Completed
Push — dev ( 8582b4...82307e )
by
unknown
30s queued 19s
created

data.datasets.power_plants.wind_farms   B

Complexity

Total Complexity 43

Size/Duplication

Total Lines 595
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 43
eloc 354
dl 0
loc 595
rs 8.96
c 0
b 0
f 0

4 Functions

Rating   Name   Duplication   Size   Complexity  
B insert() 0 116 6
F wind_power_states() 0 278 21
A generate_map() 0 51 4
D generate_wind_farms() 0 132 12

How to fix   Complexity   

Complexity

Complex classes like data.datasets.power_plants.wind_farms often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from matplotlib import pyplot as plt
2
from shapely.geometry import MultiPoint, Point
3
import geopandas as gpd
4
import numpy as np
5
import pandas as pd
6
7
from egon.data import db
8
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
9
import egon.data.config
10
11
12
def insert():
13
    """Main function. Import power objectives generate results calling the
14
    functions "generate_wind_farms" and  "wind_power_states".
15
16
    Parameters
17
    ----------
18
    *No parameters required
19
20
    """
21
22
    con = db.engine()
23
24
    # federal_std has the shapes of the German states
25
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
26
    federal_std = gpd.GeoDataFrame.from_postgis(
27
        sql, con, geom_col="geometry", crs=4326
28
    )
29
30
    # target_power_df has the expected capacity of each federal state
31
    sql = (
32
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
33
        "supply.egon_scenario_capacities"
34
    )
35
    target_power_df = pd.read_sql(sql, con)
36
37
    # mv_districts has geographic info of medium voltage districts in Germany
38
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
39
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
40
41
    # Delete all the water bodies from the federal states shapes
42
    federal_std = federal_std[federal_std["gf"] == 4]
43
    federal_std.drop(columns=["gf"], inplace=True)
44
    # Filter the potential expected from wind_onshore
45
    target_power_df = target_power_df[
46
        target_power_df["carrier"] == "wind_onshore"
47
    ]
48
    target_power_df = target_power_df[
49
        target_power_df["scenario_name"].isin(["eGon2035", "eGon100RE"])
50
    ]
51
    target_power_df.set_index("nuts", inplace=True)
52
    target_power_df["geom"] = Point(0, 0)
53
54
    # Join the geometries which belong to the same states
55
    for std in target_power_df.index:
56
        df = federal_std[federal_std["nuts"] == std]
57
        if df.size > 0:
58
            target_power_df.at[std, "name"] = df["gen"].iat[0]
59
        else:
60
            target_power_df.at[std, "name"] = np.nan
61
        target_power_df.at[std, "geom"] = df.unary_union
62
    target_power_df = gpd.GeoDataFrame(
63
        target_power_df, geometry="geom", crs=4326
64
    )
65
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
66
    target_power_df = target_power_df.to_crs(3035)
67
68
    # Create the shape for full Germany
69
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
70
    target_power_df.at["DE", "name"] = "Germany"
71
    # Generate WFs for Germany based on potential areas and existing WFs
72
    wf_areas, wf_areas_ni = generate_wind_farms()
73
74
    # Change the columns "geometry" of this GeoDataFrames
75
    wf_areas.set_geometry("centroid", inplace=True)
76
    wf_areas_ni.set_geometry("centroid", inplace=True)
77
78
    # Create centroids of mv_districts to apply the clip function
79
    mv_districts["centroid"] = mv_districts.centroid
80
    mv_districts.set_geometry("centroid", inplace=True)
81
82
    summary_t = pd.DataFrame()
83
    farms = pd.DataFrame()
84
85
    if "eGon100RE" in target_power_df["scenario_name"].values:
86
        wind_farms_state, summary_state = wind_power_states(
87
            wf_areas,
88
            wf_areas_ni,
89
            mv_districts,
90
            target_power_df.at["DE", "capacity"],
91
            "eGon100RE",
92
            "wind_onshore",
93
            "DE",
94
        )
95
        target_power_df = target_power_df[
96
            target_power_df["scenario_name"] != "eGon100RE"
97
        ]
98
99
    if "eGon2035" in target_power_df["scenario_name"].values:
100
        # Fit wind farms scenarions for each one of the states
101
        for bundesland in target_power_df.index:
102
            state_wf = gpd.clip(wf_areas, target_power_df.at[bundesland, "geom"])
103
            state_wf_ni = gpd.clip(
104
                wf_areas_ni, target_power_df.at[bundesland, "geom"]
105
            )
106
            state_mv_districts = gpd.clip(
107
                mv_districts, target_power_df.at[bundesland, "geom"]
108
            )
109
            target_power = target_power_df.at[bundesland, "capacity"]
110
            scenario_year = target_power_df.at[bundesland, "scenario_name"]
111
            source = target_power_df.at[bundesland, "carrier"]
112
            fed_state = target_power_df.at[bundesland, "name"]
113
            wind_farms_state, summary_state = wind_power_states(
114
                state_wf,
115
                state_wf_ni,
116
                state_mv_districts,
117
                target_power,
118
                scenario_year,
119
                source,
120
                fed_state,
121
            )
122
            summary_t = pd.concat([summary_t, summary_state])
123
            farms = pd.concat([farms, wind_farms_state])
124
125
    generate_map()
126
127
    return
128
129
130
def generate_wind_farms():
131
    """Generate wind farms based on existing wind farms.
132
133
    Parameters
134
    ----------
135
    *No parameters required
136
137
    """
138
    # get config
139
    cfg = egon.data.config.datasets()["power_plants"]
140
141
    # Due to typos in some inputs, some areas of existing wind farms
142
    # should be discarded using perimeter and area filters
143
    def filter_current_wf(wf_geometry):
144
        if wf_geometry.geom_type == "Point":
145
            return True
146
        if wf_geometry.geom_type == "Polygon":
147
            area = wf_geometry.area
148
            length = wf_geometry.length
149
            # Filter based on the biggest (# of WT) wind farm
150
            return (area < 40000000) & (length < 40000)
151
        if wf_geometry.geom_type == "LineString":
152
            length = wf_geometry.length
153
            return length < 1008  # 8 * rotor diameter (8*126m)
154
155
    # The function 'wind_farm' returns the connection point of a wind turbine
156
    def wind_farm(x):
157
        try:
158
            return map_ap_wea_farm[x]
159
        except:
160
            return np.nan
161
162
    # The function 'voltage' returns the voltage level a wind turbine operates
163
    def voltage(x):
164
        try:
165
            return map_ap_wea_voltage[x]
166
        except:
167
            return np.nan
168
169
    # Connect to the data base
170
    con = db.engine()
171
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
172
    # wf_areas has all the potential areas geometries for wind farms
173
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
174
    # bus has the connection points of the wind farms
175
    bus = pd.read_csv(
176
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
177
        index_col="MaStRNummer",
178
    )
179
    # Drop all the rows without connection point
180
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
181
    # wea has info of each wind turbine in Germany.
182
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
183
184
    # Delete all the rows without information about geographical location
185
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
186
    # Delete all the offshore wind turbines
187
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
188
    # the variable map_ap_wea_farm have the connection point of all the
189
    # available wt in the dataframe bus.
190
    map_ap_wea_farm = {}
191
    map_ap_wea_voltage = {}
192
193
    wea["connection point"] = wea["LokationMastrNummer"].map(
194
        bus["NetzanschlusspunktMastrNummer"]
195
    )
196
    wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])
197
198
    # Create the columns 'geometry' which will have location of each WT in a
199
    # point type
200
    wea = gpd.GeoDataFrame(
201
        wea,
202
        geometry=gpd.points_from_xy(
203
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
204
        ),
205
    )
206
207
    # wf_size storages the number of WT connected to each connection point
208
    wf_size = wea["connection point"].value_counts()
209
    # Delete all the connection points with less than 3 WT
210
    wf_size = wf_size[wf_size >= 3]
211
    # Filter all the WT which are not part of a wind farm of at least 3 WT
212
    wea = wea[wea["connection point"].isin(wf_size.index)]
213
    # current_wfs has all the geometries that represent the existing wind
214
    # farms
215
    current_wfs = gpd.GeoDataFrame(
216
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
217
    )
218
    for conn_point, wt_location in wea.groupby("connection point"):
219
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
220
            wt_location["geometry"].values
221
        ).convex_hull
222
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
223
224
    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
225
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
226
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
227
    # The 'filter_wts' is used to discard atypical values for the current wind
228
    # farms
229
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
230
        lambda x: filter_current_wf(x)
231
    )
232
233
    # Apply the filter based on area and perimeter
234
    current_wfs = current_wfs[current_wfs["filter2"]]
235
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
236
237
    wf_areas["area [km²]"] = wf_areas.area / 1000000
238
239
    # Exclude areas smaller than X m². X was calculated as the area of
240
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
241
    min_area = 4 * (0.126**2) * np.sqrt(3)
242
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
243
244
    # Find the centroid of all the suitable potential areas
245
    wf_areas["centroid"] = wf_areas.centroid
246
247
    # find the potential areas that intersects the convex hulls of current
248
    # wind farms and assign voltage levels
249
    wf_areas = wf_areas.to_crs(4326)
250
    for i in wf_areas.index:
251
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
252
        if intersection.any() == False:
253
            wf_areas.at[i, "voltage"] = "No Intersection"
254
        else:
255
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
256
257
    # wf_areas_ni has the potential areas which don't intersect any current
258
    # wind farm
259
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
260
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
261
    return wf_areas, wf_areas_ni
262
263
264
def wind_power_states(
265
    state_wf,
266
    state_wf_ni,
267
    state_mv_districts,
268
    target_power,
269
    scenario_year,
270
    source,
271
    fed_state,
272
):
273
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
274
    database.
275
276
    Parameters
277
    ----------
278
    state_wf: geodataframe, mandatory
279
        gdf containing all the wf in the state created based on existing wf.
280
    state_wf_ni: geodataframe, mandatory
281
        potential areas in the the state wich don't intersect any existing wf
282
    state_mv_districts: geodataframe, mandatory
283
        gdf containing all the MV/HV substations in the state
284
    target_power: int, mandatory
285
        Objective power for a state given in MW
286
    scenario_year: str, mandatory
287
        name of the scenario
288
    source: str, mandatory
289
        Type of energy genetor. Always "Wind_onshore" for this script.
290
    fed_state: str, mandatory
291
        Name of the state where the wind farms will be allocated
292
293
    """
294
295
    def match_district_se(x):
296
        for sub in hvmv_substation.index:
297
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
298
                return hvmv_substation.at[sub, "point"]
299
300
    con = db.engine()
301
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
302
    # hvmv_substation has the information about HV transmission lines in
303
    # Germany
304
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
305
306
    # Set wind potential depending on geographical location
307
    power_north = 21.05  # MW/km²
308
    power_south = 16.81  # MW/km²
309
    # Set a maximum installed capacity to limit the power of big potential
310
    # areas
311
    max_power_hv = 120  # in MW
312
    max_power_mv = 20  # in MW
313
    # Max distance between WF (connected to MV) and nearest HV substation
314
    # that allows its connection to HV.
315
    max_dist_hv = 20000  # in meters
316
317
    summary = pd.DataFrame(
318
        columns=["state", "target", "from existin WF", "MV districts"]
319
    )
320
321
    north = [
322
        "Schleswig-Holstein",
323
        "Mecklenburg-Vorpommern",
324
        "Niedersachsen",
325
        "Bremen",
326
        "Hamburg",
327
    ]
328
329
    if fed_state == "DE":
330
        sql = f"""SELECT * FROM boundaries.vg250_lan
331
        WHERE gen in {tuple(north)}
332
        """
333
        north_states = gpd.GeoDataFrame.from_postgis(
334
            sql, con, geom_col="geometry"
335
        )
336
        north_states.to_crs(3035, inplace=True)
337
        state_wf["nord"] = state_wf.within(north_states.unary_union)
338
        state_wf["inst capacity [MW]"] = state_wf.apply(
339
            lambda x: (
340
                power_north * x["area [km²]"]
341
                if x["nord"]
342
                else power_south * x["area [km²]"]),
343
            axis=1,
344
        )
345
    else:
346
        if fed_state in north:
347
            state_wf["inst capacity [MW]"] = (
348
                power_north * state_wf["area [km²]"]
349
            )
350
        else:
351
            state_wf["inst capacity [MW]"] = (
352
                power_south * state_wf["area [km²]"]
353
            )
354
355
    # Divide selected areas based on voltage of connection points
356
    wf_mv = state_wf[
357
        (state_wf["voltage"] != "Hochspannung")
358
        & (state_wf["voltage"] != "Hoechstspannung")
359
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
360
    ]
361
362
    wf_hv = state_wf[
363
        (state_wf["voltage"] == "Hochspannung")
364
        | (state_wf["voltage"] == "Hoechstspannung")
365
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
366
    ]
367
368
    # Wind farms connected to MV network will be connected to HV network if
369
    # the distance to the closest HV substation is =< max_dist_hv, and the
370
    # installed capacity is bigger than max_power_mv
371
    hvmv_substation = hvmv_substation.to_crs(3035)
372
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
373
        lambda x: int(x.split(";")[0])
374
    )
375
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
376
    hv_substations = hv_substations.unary_union  # join all the hv_substations
377
    wf_mv["dist_to_HV"] = (
378
        state_wf["geom"].to_crs(3035).distance(hv_substations)
379
    )
380
    wf_mv_to_hv = wf_mv[
381
        (wf_mv["dist_to_HV"] <= max_dist_hv)
382
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
383
    ]
384
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
385
    wf_mv_to_hv["voltage"] = "Hochspannung"
386
387
    wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
388
    wf_mv = wf_mv[
389
        (wf_mv["dist_to_HV"] > max_dist_hv)
390
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
391
    ]
392
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])
393
394
    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
395
        lambda x: x if x < max_power_hv else max_power_hv
396
    )
397
398
    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
399
        lambda x: x if x < max_power_mv else max_power_mv
400
    )
401
402
    wind_farms = pd.concat([wf_hv, wf_mv])
403
404
    # Adjust the total installed capacity to the scenario
405
    total_wind_power = (
406
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
407
    )
408
409
    if total_wind_power > target_power:
410
        scale_factor = target_power / total_wind_power
411
        wf_mv["inst capacity [MW]"] = (
412
            wf_mv["inst capacity [MW]"] * scale_factor
413
        )
414
        wf_hv["inst capacity [MW]"] = (
415
            wf_hv["inst capacity [MW]"] * scale_factor
416
        )
417
        wind_farms = pd.concat([wf_hv, wf_mv])
418
        summary = pd.concat(
419
            [
420
                summary,
421
                pd.DataFrame(
422
                    index=[summary.index.max() + 1],
423
                    data={
424
                        "state": fed_state,
425
                        "target": target_power,
426
                        "from existin WF": wind_farms[
427
                            "inst capacity [MW]"
428
                        ].sum(),
429
                        "MV districts": 0,
430
                    },
431
                ),
432
            ],
433
            ignore_index=True,
434
        )
435
    else:
436
        extra_wf = state_mv_districts.copy()
437
        extra_wf = extra_wf.set_geometry("geom")
438
        extra_wf["area [km²]"] = 0.0
439
        for district in extra_wf.index:
440
            try:
441
                pot_area_district = gpd.clip(
442
                    state_wf_ni, extra_wf.at[district, "geom"]
443
                )
444
                extra_wf.at[district, "area [km²]"] = pot_area_district[
445
                    "area [km²]"
446
                ].sum()
447
            except:
448
                print(district)
449
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
450
        total_new_area = extra_wf["area [km²]"].sum()
451
        scale_factor = (target_power - total_wind_power) / total_new_area
452
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
453
        extra_wf["voltage"] = "Hochspannung"
454
        summary = pd.concat(
455
            [
456
                summary,
457
                pd.DataFrame(
458
                    index=[summary.index.max() + 1],
459
                    data={
460
                        "state": fed_state,
461
                        "target": target_power,
462
                        "from existin WF": wind_farms[
463
                            "inst capacity [MW]"
464
                        ].sum(),
465
                        "MV districts": extra_wf["inst capacity [MW]"].sum(),
466
                    },
467
                ),
468
            ],
469
            ignore_index=True,
470
        )
471
        extra_wf.to_crs(4326, inplace=True)
472
        wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)
473
474
    # Use Definition of thresholds for voltage level assignment
475
    wind_farms["voltage_level"] = 0
476
    for i in wind_farms.index:
477
        try:
478
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
479
                wind_farms.at[i, "voltage_level"] = 5
480
                continue
481
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
482
                wind_farms.at[i, "voltage_level"] = 4
483
                continue
484
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
485
                wind_farms.at[i, "voltage_level"] = 3
486
                continue
487
        except:
488
            print(i)
489
490
    # Look for the maximum id in the table egon_power_plants
491
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
492
    max_id = pd.read_sql(sql, con)
493
    max_id = max_id["max"].iat[0]
494
    if max_id is None:
495
        wind_farm_id = 1
496
    else:
497
        wind_farm_id = int(max_id + 1)
498
499
    # write_table in egon-data database:
500
501
    # Copy relevant columns from wind_farms
502
    insert_wind_farms = wind_farms[
503
        ["inst capacity [MW]", "voltage_level", "centroid"]
504
    ]
505
506
    # Set static column values
507
    insert_wind_farms["carrier"] = source
508
    insert_wind_farms["scenario"] = scenario_year
509
510
    # Change name and crs of geometry column
511
    insert_wind_farms = (
512
        insert_wind_farms.rename(
513
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
514
        )
515
        .set_geometry("geom")
516
        .to_crs(4326)
517
    )
518
519
    # Reset index
520
    insert_wind_farms.index = pd.RangeIndex(
521
        start=wind_farm_id,
522
        stop=wind_farm_id + len(insert_wind_farms),
523
        name="id",
524
    )
525
526
    # Delete old wind_onshore generators
527
    db.execute_sql(
528
        f"""DELETE FROM supply.egon_power_plants
529
        WHERE carrier = 'wind_onshore'
530
        AND scenario = '{scenario_year}'
531
        """
532
    )
533
534
    # Insert into database
535
    insert_wind_farms.reset_index().to_postgis(
536
        "egon_power_plants",
537
        schema="supply",
538
        con=db.engine(),
539
        if_exists="append",
540
    )
541
    return wind_farms, summary
542
543
544
def generate_map():
545
    """Generates a map with the position of all the wind farms
546
547
    Parameters
548
    ----------
549
    *No parameters required
550
551
    """
552
    con = db.engine()
553
554
    # Import wind farms from egon-data
555
    sql = (
556
        "SELECT  carrier, el_capacity, geom, scenario FROM "
557
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
558
    )
559
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
560
        sql, con, geom_col="geom", crs=4326
561
    )
562
    wind_farms_t = wind_farms_t.to_crs(3035)
563
564
    for scenario in wind_farms_t.scenario.unique():
565
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
566
        # mv_districts has geographic info of medium voltage districts in
567
        # Germany
568
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
569
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
570
        mv_districts = mv_districts.to_crs(3035)
571
572
        mv_districts["power"] = 0.0
573
        for std in mv_districts.index:
574
            try:
575
                mv_districts.at[std, "power"] = gpd.clip(
576
                    wind_farms, mv_districts.at[std, "geom"]
577
                ).el_capacity.sum()
578
            except:
579
                print(std)
580
581
        fig, ax = plt.subplots(1, 1)
582
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
583
        mv_districts.plot(
584
            ax=ax,
585
            column="power",
586
            cmap="magma_r",
587
            legend=True,
588
            legend_kwds={
589
                "label": "Installed capacity in MW",
590
                "orientation": "vertical",
591
            },
592
        )
593
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
594
    return 0
595