Completed
Push — dev ( f143c8...357bd8 )
by
unknown
01:19 queued 01:03
created

data.datasets.power_plants.wind_farms.insert()   C

Complexity

Conditions 6

Size

Total Lines 118
Code Lines 72

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 72
dl 0
loc 118
rs 6.983
c 0
b 0
f 0
cc 6
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
import re
2
3
from matplotlib import pyplot as plt
4
from shapely.geometry import MultiPoint, Point
5
import geopandas as gpd
6
import numpy as np
7
import pandas as pd
8
9
from egon.data import db
10
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
11
import egon.data.config
12
13
14
def insert():
15
    """Main function. Import power objectives generate results calling the
16
    functions "generate_wind_farms" and  "wind_power_states".
17
18
    Parameters
19
    ----------
20
    *No parameters required
21
22
    """
23
24
    con = db.engine()
25
26
    # federal_std has the shapes of the German states
27
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
28
    federal_std = gpd.GeoDataFrame.from_postgis(
29
        sql, con, geom_col="geometry", crs=4326
30
    )
31
32
    # target_power_df has the expected capacity of each federal state
33
    sql = (
34
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
35
        "supply.egon_scenario_capacities"
36
    )
37
    target_power_df = pd.read_sql(sql, con)
38
39
    # mv_districts has geographic info of medium voltage districts in Germany
40
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
41
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
42
43
    # Delete all the water bodies from the federal states shapes
44
    federal_std = federal_std[federal_std["gf"] == 4]
45
    federal_std.drop(columns=["gf"], inplace=True)
46
    # Filter the potential expected from wind_onshore
47
    target_power_df = target_power_df[
48
        target_power_df["carrier"] == "wind_onshore"
49
    ]
50
    target_power_df = target_power_df[
51
        target_power_df["scenario_name"].isin(["eGon2035", "eGon100RE"])
52
    ]
53
    target_power_df.set_index("nuts", inplace=True)
54
    target_power_df["geom"] = Point(0, 0)
55
56
    # Join the geometries which belong to the same states
57
    for std in target_power_df.index:
58
        df = federal_std[federal_std["nuts"] == std]
59
        if df.size > 0:
60
            target_power_df.at[std, "name"] = df["gen"].iat[0]
61
        else:
62
            target_power_df.at[std, "name"] = np.nan
63
        target_power_df.at[std, "geom"] = df.unary_union
64
    target_power_df = gpd.GeoDataFrame(
65
        target_power_df, geometry="geom", crs=4326
66
    )
67
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
68
    target_power_df = target_power_df.to_crs(3035)
69
70
    # Create the shape for full Germany
71
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
72
    target_power_df.at["DE", "name"] = "Germany"
73
    # Generate WFs for Germany based on potential areas and existing WFs
74
    wf_areas, wf_areas_ni = generate_wind_farms()
75
76
    # Change the columns "geometry" of this GeoDataFrames
77
    wf_areas.set_geometry("centroid", inplace=True)
78
    wf_areas_ni.set_geometry("centroid", inplace=True)
79
80
    # Create centroids of mv_districts to apply the clip function
81
    mv_districts["centroid"] = mv_districts.centroid
82
    mv_districts.set_geometry("centroid", inplace=True)
83
84
    summary_t = pd.DataFrame()
85
    farms = pd.DataFrame()
86
87
    if "eGon100RE" in target_power_df["scenario_name"].values:
88
        wind_farms_state, summary_state = wind_power_states(
89
            wf_areas,
90
            wf_areas_ni,
91
            mv_districts,
92
            target_power_df.at["DE", "capacity"],
93
            "eGon100RE",
94
            "wind_onshore",
95
            "DE",
96
        )
97
        target_power_df = target_power_df[
98
            target_power_df["scenario_name"] != "eGon100RE"
99
        ]
100
101
    if "eGon2035" in target_power_df["scenario_name"].values:
102
        # Fit wind farms scenarions for each one of the states
103
        for bundesland in target_power_df.index:
104
            state_wf = gpd.clip(
105
                wf_areas, target_power_df.at[bundesland, "geom"]
106
            )
107
            state_wf_ni = gpd.clip(
108
                wf_areas_ni, target_power_df.at[bundesland, "geom"]
109
            )
110
            state_mv_districts = gpd.clip(
111
                mv_districts, target_power_df.at[bundesland, "geom"]
112
            )
113
            target_power = target_power_df.at[bundesland, "capacity"]
114
            scenario_year = target_power_df.at[bundesland, "scenario_name"]
115
            source = target_power_df.at[bundesland, "carrier"]
116
            fed_state = target_power_df.at[bundesland, "name"]
117
            wind_farms_state, summary_state = wind_power_states(
118
                state_wf,
119
                state_wf_ni,
120
                state_mv_districts,
121
                target_power,
122
                scenario_year,
123
                source,
124
                fed_state,
125
            )
126
            summary_t = pd.concat([summary_t, summary_state])
127
            farms = pd.concat([farms, wind_farms_state])
128
129
    generate_map()
130
131
    return
132
133
134
def generate_wind_farms():
135
    """Generate wind farms based on existing wind farms.
136
137
    Parameters
138
    ----------
139
    *No parameters required
140
141
    """
142
    # get config
143
    cfg = egon.data.config.datasets()["power_plants"]
144
145
    # Due to typos in some inputs, some areas of existing wind farms
146
    # should be discarded using perimeter and area filters
147
    def filter_current_wf(wf_geometry):
148
        if wf_geometry.geom_type == "Point":
149
            return True
150
        if wf_geometry.geom_type == "Polygon":
151
            area = wf_geometry.area
152
            length = wf_geometry.length
153
            # Filter based on the biggest (# of WT) wind farm
154
            return (area < 40000000) & (length < 40000)
155
        if wf_geometry.geom_type == "LineString":
156
            length = wf_geometry.length
157
            return length < 1008  # 8 * rotor diameter (8*126m)
158
159
    # The function 'wind_farm' returns the connection point of a wind turbine
160
    def wind_farm(x):
161
        try:
162
            return map_ap_wea_farm[x]
163
        except:
164
            return np.nan
165
166
    # The function 'voltage' returns the voltage level a wind turbine operates
167
    def voltage(x):
168
        try:
169
            return map_ap_wea_voltage[x]
170
        except:
171
            return np.nan
172
173
    # Connect to the data base
174
    con = db.engine()
175
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
176
    # wf_areas has all the potential areas geometries for wind farms
177
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
178
    # bus has the connection points of the wind farms
179
    bus = pd.read_csv(
180
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
181
        index_col="MaStRNummer",
182
    )
183
    # Drop all the rows without connection point
184
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
185
    # wea has info of each wind turbine in Germany.
186
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
187
188
    # Delete all the rows without information about geographical location
189
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
190
    # Delete all the offshore wind turbines
191
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
192
    # the variable map_ap_wea_farm have the connection point of all the
193
    # available wt in the dataframe bus.
194
    map_ap_wea_farm = {}
195
    map_ap_wea_voltage = {}
196
197
    wea["connection point"] = wea["LokationMastrNummer"].map(
198
        bus["NetzanschlusspunktMastrNummer"]
199
    )
200
    wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])
201
202
    # Create the columns 'geometry' which will have location of each WT in a
203
    # point type
204
    wea = gpd.GeoDataFrame(
205
        wea,
206
        geometry=gpd.points_from_xy(
207
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
208
        ),
209
    )
210
211
    # wf_size storages the number of WT connected to each connection point
212
    wf_size = wea["connection point"].value_counts()
213
    # Delete all the connection points with less than 3 WT
214
    wf_size = wf_size[wf_size >= 3]
215
    # Filter all the WT which are not part of a wind farm of at least 3 WT
216
    wea = wea[wea["connection point"].isin(wf_size.index)]
217
    # current_wfs has all the geometries that represent the existing wind
218
    # farms
219
    current_wfs = gpd.GeoDataFrame(
220
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
221
    )
222
    for conn_point, wt_location in wea.groupby("connection point"):
223
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
224
            wt_location["geometry"].values
225
        ).convex_hull
226
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
227
228
    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
229
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
230
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
231
    # The 'filter_wts' is used to discard atypical values for the current wind
232
    # farms
233
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
234
        lambda x: filter_current_wf(x)
235
    )
236
237
    # Apply the filter based on area and perimeter
238
    current_wfs = current_wfs[current_wfs["filter2"]]
239
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
240
241
    wf_areas["area [km²]"] = wf_areas.area / 1000000
242
243
    # Exclude areas smaller than X m². X was calculated as the area of
244
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
245
    min_area = 4 * (0.126**2) * np.sqrt(3)
246
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
247
248
    # Find the centroid of all the suitable potential areas
249
    wf_areas["centroid"] = wf_areas.centroid
250
251
    # find the potential areas that intersects the convex hulls of current
252
    # wind farms and assign voltage levels
253
    wf_areas = wf_areas.to_crs(4326)
254
    for i in wf_areas.index:
255
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
256
        if intersection.any() == False:
257
            wf_areas.at[i, "voltage"] = "No Intersection"
258
        else:
259
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
260
261
    # wf_areas_ni has the potential areas which don't intersect any current
262
    # wind farm
263
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
264
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
265
    return wf_areas, wf_areas_ni
266
267
268
def wind_power_states(
269
    state_wf,
270
    state_wf_ni,
271
    state_mv_districts,
272
    target_power,
273
    scenario_year,
274
    source,
275
    fed_state,
276
):
277
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
278
    database.
279
280
    Parameters
281
    ----------
282
    state_wf: geodataframe, mandatory
283
        gdf containing all the wf in the state created based on existing wf.
284
    state_wf_ni: geodataframe, mandatory
285
        potential areas in the the state wich don't intersect any existing wf
286
    state_mv_districts: geodataframe, mandatory
287
        gdf containing all the MV/HV substations in the state
288
    target_power: int, mandatory
289
        Objective power for a state given in MW
290
    scenario_year: str, mandatory
291
        name of the scenario
292
    source: str, mandatory
293
        Type of energy genetor. Always "Wind_onshore" for this script.
294
    fed_state: str, mandatory
295
        Name of the state where the wind farms will be allocated
296
297
    """
298
299
    def match_district_se(x):
300
        for sub in hvmv_substation.index:
301
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
302
                return hvmv_substation.at[sub, "point"]
303
304
    con = db.engine()
305
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
306
    # hvmv_substation has the information about HV transmission lines in
307
    # Germany
308
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
309
310
    # Set wind potential depending on geographical location
311
    power_north = 21.05  # MW/km²
312
    power_south = 16.81  # MW/km²
313
    # Set a maximum installed capacity to limit the power of big potential
314
    # areas
315
    max_power_hv = 120  # in MW
316
    max_power_mv = 20  # in MW
317
    # Max distance between WF (connected to MV) and nearest HV substation
318
    # that allows its connection to HV.
319
    max_dist_hv = 20000  # in meters
320
321
    summary = pd.DataFrame(
322
        columns=["state", "target", "from existin WF", "MV districts"]
323
    )
324
325
    north = [
326
        "Schleswig-Holstein",
327
        "Mecklenburg-Vorpommern",
328
        "Niedersachsen",
329
        "Bremen",
330
        "Hamburg",
331
    ]
332
333
    if fed_state == "DE":
334
        sql = f"""SELECT * FROM boundaries.vg250_lan
335
        WHERE gen in {tuple(north)}
336
        """
337
        north_states = gpd.GeoDataFrame.from_postgis(
338
            sql, con, geom_col="geometry"
339
        )
340
        north_states.to_crs(3035, inplace=True)
341
        state_wf["nord"] = state_wf.within(north_states.unary_union)
342
        state_wf["inst capacity [MW]"] = state_wf.apply(
343
            lambda x: (
344
                power_north * x["area [km²]"]
345
                if x["nord"]
346
                else power_south * x["area [km²]"]
347
            ),
348
            axis=1,
349
        )
350
    else:
351
        if fed_state in north:
352
            state_wf["inst capacity [MW]"] = (
353
                power_north * state_wf["area [km²]"]
354
            )
355
        else:
356
            state_wf["inst capacity [MW]"] = (
357
                power_south * state_wf["area [km²]"]
358
            )
359
360
    # Divide selected areas based on voltage of connection points
361
    wf_mv = state_wf[
362
        (state_wf["voltage"] != "Hochspannung")
363
        & (state_wf["voltage"] != "Hoechstspannung")
364
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
365
    ]
366
367
    wf_hv = state_wf[
368
        (state_wf["voltage"] == "Hochspannung")
369
        | (state_wf["voltage"] == "Hoechstspannung")
370
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
371
    ]
372
373
    # Wind farms connected to MV network will be connected to HV network if
374
    # the distance to the closest HV substation is =< max_dist_hv, and the
375
    # installed capacity is bigger than max_power_mv
376
    hvmv_substation = hvmv_substation.to_crs(3035)
377
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
378
        lambda x: int(re.split(";|:", x)[0])
379
    )
380
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
381
    hv_substations = hv_substations.unary_union  # join all the hv_substations
382
    wf_mv["dist_to_HV"] = (
383
        state_wf["geom"].to_crs(3035).distance(hv_substations)
384
    )
385
    wf_mv_to_hv = wf_mv[
386
        (wf_mv["dist_to_HV"] <= max_dist_hv)
387
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
388
    ]
389
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
390
    wf_mv_to_hv["voltage"] = "Hochspannung"
391
392
    wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
393
    wf_mv = wf_mv[
394
        (wf_mv["dist_to_HV"] > max_dist_hv)
395
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
396
    ]
397
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])
398
399
    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
400
        lambda x: x if x < max_power_hv else max_power_hv
401
    )
402
403
    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
404
        lambda x: x if x < max_power_mv else max_power_mv
405
    )
406
407
    wind_farms = pd.concat([wf_hv, wf_mv])
408
409
    # Adjust the total installed capacity to the scenario
410
    total_wind_power = (
411
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
412
    )
413
414
    if total_wind_power > target_power:
415
        scale_factor = target_power / total_wind_power
416
        wf_mv["inst capacity [MW]"] = (
417
            wf_mv["inst capacity [MW]"] * scale_factor
418
        )
419
        wf_hv["inst capacity [MW]"] = (
420
            wf_hv["inst capacity [MW]"] * scale_factor
421
        )
422
        wind_farms = pd.concat([wf_hv, wf_mv])
423
        summary = pd.concat(
424
            [
425
                summary,
426
                pd.DataFrame(
427
                    index=[summary.index.max() + 1],
428
                    data={
429
                        "state": fed_state,
430
                        "target": target_power,
431
                        "from existin WF": wind_farms[
432
                            "inst capacity [MW]"
433
                        ].sum(),
434
                        "MV districts": 0,
435
                    },
436
                ),
437
            ],
438
            ignore_index=True,
439
        )
440
    else:
441
        extra_wf = state_mv_districts.copy()
442
        extra_wf = extra_wf.set_geometry("geom")
443
        extra_wf["area [km²]"] = 0.0
444
        for district in extra_wf.index:
445
            try:
446
                pot_area_district = gpd.clip(
447
                    state_wf_ni, extra_wf.at[district, "geom"]
448
                )
449
                extra_wf.at[district, "area [km²]"] = pot_area_district[
450
                    "area [km²]"
451
                ].sum()
452
            except:
453
                print(district)
454
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
455
        total_new_area = extra_wf["area [km²]"].sum()
456
        scale_factor = (target_power - total_wind_power) / total_new_area
457
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
458
        extra_wf["voltage"] = "Hochspannung"
459
        summary = pd.concat(
460
            [
461
                summary,
462
                pd.DataFrame(
463
                    index=[summary.index.max() + 1],
464
                    data={
465
                        "state": fed_state,
466
                        "target": target_power,
467
                        "from existin WF": wind_farms[
468
                            "inst capacity [MW]"
469
                        ].sum(),
470
                        "MV districts": extra_wf["inst capacity [MW]"].sum(),
471
                    },
472
                ),
473
            ],
474
            ignore_index=True,
475
        )
476
        extra_wf.to_crs(4326, inplace=True)
477
        wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)
478
479
    # Use Definition of thresholds for voltage level assignment
480
    wind_farms["voltage_level"] = 0
481
    for i in wind_farms.index:
482
        try:
483
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
484
                wind_farms.at[i, "voltage_level"] = 5
485
                continue
486
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
487
                wind_farms.at[i, "voltage_level"] = 4
488
                continue
489
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
490
                wind_farms.at[i, "voltage_level"] = 3
491
                continue
492
        except:
493
            print(i)
494
495
    # Look for the maximum id in the table egon_power_plants
496
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
497
    max_id = pd.read_sql(sql, con)
498
    max_id = max_id["max"].iat[0]
499
    if max_id is None:
500
        wind_farm_id = 1
501
    else:
502
        wind_farm_id = int(max_id + 1)
503
504
    # write_table in egon-data database:
505
506
    # Copy relevant columns from wind_farms
507
    insert_wind_farms = wind_farms[
508
        ["inst capacity [MW]", "voltage_level", "centroid"]
509
    ]
510
511
    # Set static column values
512
    insert_wind_farms["carrier"] = source
513
    insert_wind_farms["scenario"] = scenario_year
514
515
    # Change name and crs of geometry column
516
    insert_wind_farms = (
517
        insert_wind_farms.rename(
518
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
519
        )
520
        .set_geometry("geom")
521
        .to_crs(4326)
522
    )
523
524
    # Reset index
525
    insert_wind_farms.index = pd.RangeIndex(
526
        start=wind_farm_id,
527
        stop=wind_farm_id + len(insert_wind_farms),
528
        name="id",
529
    )
530
531
    # Delete old wind_onshore generators
532
    db.execute_sql(
533
        f"""DELETE FROM supply.egon_power_plants
534
        WHERE carrier = 'wind_onshore'
535
        AND scenario = '{scenario_year}'
536
        """
537
    )
538
539
    # Insert into database
540
    insert_wind_farms.reset_index().to_postgis(
541
        "egon_power_plants",
542
        schema="supply",
543
        con=db.engine(),
544
        if_exists="append",
545
    )
546
    return wind_farms, summary
547
548
549
def generate_map():
550
    """Generates a map with the position of all the wind farms
551
552
    Parameters
553
    ----------
554
    *No parameters required
555
556
    """
557
    con = db.engine()
558
559
    # Import wind farms from egon-data
560
    sql = (
561
        "SELECT  carrier, el_capacity, geom, scenario FROM "
562
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
563
    )
564
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
565
        sql, con, geom_col="geom", crs=4326
566
    )
567
    wind_farms_t = wind_farms_t.to_crs(3035)
568
569
    for scenario in wind_farms_t.scenario.unique():
570
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
571
        # mv_districts has geographic info of medium voltage districts in
572
        # Germany
573
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
574
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
575
        mv_districts = mv_districts.to_crs(3035)
576
577
        mv_districts["power"] = 0.0
578
        for std in mv_districts.index:
579
            try:
580
                mv_districts.at[std, "power"] = gpd.clip(
581
                    wind_farms, mv_districts.at[std, "geom"]
582
                ).el_capacity.sum()
583
            except:
584
                print(std)
585
586
        fig, ax = plt.subplots(1, 1)
587
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
588
        mv_districts.plot(
589
            ax=ax,
590
            column="power",
591
            cmap="magma_r",
592
            legend=True,
593
            legend_kwds={
594
                "label": "Installed capacity in MW",
595
                "orientation": "vertical",
596
            },
597
        )
598
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
599
    return 0
600