Passed
Pull Request — dev (#1197)
by
unknown
05:18
created

data.datasets.power_plants.wind_farms   B

Complexity

Total Complexity 43

Size/Duplication

Total Lines 592
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 43
eloc 352
dl 0
loc 592
rs 8.96
c 0
b 0
f 0

4 Functions

Rating   Name   Duplication   Size   Complexity  
B insert() 0 113 6
F wind_power_states() 0 278 21
A generate_map() 0 51 4
D generate_wind_farms() 0 132 12

How to fix   Complexity   

Complexity

Complex classes like data.datasets.power_plants.wind_farms often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from matplotlib import pyplot as plt
2
from shapely.geometry import MultiPoint, Point
3
import geopandas as gpd
4
import numpy as np
5
import pandas as pd
6
7
from egon.data import db
8
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
9
import egon.data.config
10
11
12
def insert():
13
    """Main function. Import power objectives generate results calling the
14
    functions "generate_wind_farms" and  "wind_power_states".
15
16
    Parameters
17
    ----------
18
    *No parameters required
19
20
    """
21
22
    con = db.engine()
23
24
    # federal_std has the shapes of the German states
25
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
26
    federal_std = gpd.GeoDataFrame.from_postgis(
27
        sql, con, geom_col="geometry", crs=4326
28
    )
29
30
    # target_power_df has the expected capacity of each federal state
31
    sql = (
32
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
33
        "supply.egon_scenario_capacities"
34
    )
35
    target_power_df = pd.read_sql(sql, con)
36
37
    # mv_districts has geographic info of medium voltage districts in Germany
38
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
39
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
40
41
    # Delete all the water bodies from the federal states shapes
42
    federal_std = federal_std[federal_std["gf"] == 4]
43
    federal_std.drop(columns=["gf"], inplace=True)
44
    # Filter the potential expected from wind_onshore
45
    target_power_df = target_power_df[
46
        target_power_df["carrier"] == "wind_onshore"
47
    ]
48
    target_power_df.set_index("nuts", inplace=True)
49
    target_power_df["geom"] = Point(0, 0)
50
51
    # Join the geometries which belong to the same states
52
    for std in target_power_df.index:
53
        df = federal_std[federal_std["nuts"] == std]
54
        if df.size > 0:
55
            target_power_df.at[std, "name"] = df["gen"].iat[0]
56
        else:
57
            target_power_df.at[std, "name"] = np.nan
58
        target_power_df.at[std, "geom"] = df.unary_union
59
    target_power_df = gpd.GeoDataFrame(
60
        target_power_df, geometry="geom", crs=4326
61
    )
62
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
63
    target_power_df = target_power_df.to_crs(3035)
64
65
    # Create the shape for full Germany
66
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
67
    target_power_df.at["DE", "name"] = "Germany"
68
    # Generate WFs for Germany based on potential areas and existing WFs
69
    wf_areas, wf_areas_ni = generate_wind_farms()
70
71
    # Change the columns "geometry" of this GeoDataFrames
72
    wf_areas.set_geometry("centroid", inplace=True)
73
    wf_areas_ni.set_geometry("centroid", inplace=True)
74
75
    # Create centroids of mv_districts to apply the clip function
76
    mv_districts["centroid"] = mv_districts.centroid
77
    mv_districts.set_geometry("centroid", inplace=True)
78
79
    summary_t = pd.DataFrame()
80
    farms = pd.DataFrame()
81
82
    if "eGon100RE" in target_power_df["scenario_name"].values:
83
        wind_farms_state, summary_state = wind_power_states(
84
            wf_areas,
85
            wf_areas_ni,
86
            mv_districts,
87
            target_power_df["capacity"].values[0],
88
            "eGon100RE",
89
            "wind_onshore",
90
            "DE",
91
        )
92
        target_power_df = target_power_df[
93
            target_power_df["scenario_name"] != "eGon100RE"
94
        ]
95
96
    if "eGon2035" in target_power_df["scenario_name"].values:
97
        # Fit wind farms scenarions for each one of the states
98
        for bundesland in target_power_df.index:
99
            state_wf = gpd.clip(wf_areas, target_power_df.at[bundesland, "geom"])
100
            state_wf_ni = gpd.clip(
101
                wf_areas_ni, target_power_df.at[bundesland, "geom"]
102
            )
103
            state_mv_districts = gpd.clip(
104
                mv_districts, target_power_df.at[bundesland, "geom"]
105
            )
106
            target_power = target_power_df.at[bundesland, "capacity"]
107
            scenario_year = target_power_df.at[bundesland, "scenario_name"]
108
            source = target_power_df.at[bundesland, "carrier"]
109
            fed_state = target_power_df.at[bundesland, "name"]
110
            wind_farms_state, summary_state = wind_power_states(
111
                state_wf,
112
                state_wf_ni,
113
                state_mv_districts,
114
                target_power,
115
                scenario_year,
116
                source,
117
                fed_state,
118
            )
119
            summary_t = pd.concat([summary_t, summary_state])
120
            farms = pd.concat([farms, wind_farms_state])
121
122
    generate_map()
123
124
    return
125
126
127
def generate_wind_farms():
128
    """Generate wind farms based on existing wind farms.
129
130
    Parameters
131
    ----------
132
    *No parameters required
133
134
    """
135
    # get config
136
    cfg = egon.data.config.datasets()["power_plants"]
137
138
    # Due to typos in some inputs, some areas of existing wind farms
139
    # should be discarded using perimeter and area filters
140
    def filter_current_wf(wf_geometry):
141
        if wf_geometry.geom_type == "Point":
142
            return True
143
        if wf_geometry.geom_type == "Polygon":
144
            area = wf_geometry.area
145
            length = wf_geometry.length
146
            # Filter based on the biggest (# of WT) wind farm
147
            return (area < 40000000) & (length < 40000)
148
        if wf_geometry.geom_type == "LineString":
149
            length = wf_geometry.length
150
            return length < 1008  # 8 * rotor diameter (8*126m)
151
152
    # The function 'wind_farm' returns the connection point of a wind turbine
153
    def wind_farm(x):
154
        try:
155
            return map_ap_wea_farm[x]
156
        except:
157
            return np.nan
158
159
    # The function 'voltage' returns the voltage level a wind turbine operates
160
    def voltage(x):
161
        try:
162
            return map_ap_wea_voltage[x]
163
        except:
164
            return np.nan
165
166
    # Connect to the data base
167
    con = db.engine()
168
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
169
    # wf_areas has all the potential areas geometries for wind farms
170
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
171
    # bus has the connection points of the wind farms
172
    bus = pd.read_csv(
173
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
174
        index_col="MaStRNummer",
175
    )
176
    # Drop all the rows without connection point
177
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
178
    # wea has info of each wind turbine in Germany.
179
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
180
181
    # Delete all the rows without information about geographical location
182
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
183
    # Delete all the offshore wind turbines
184
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
185
    # the variable map_ap_wea_farm have the connection point of all the
186
    # available wt in the dataframe bus.
187
    map_ap_wea_farm = {}
188
    map_ap_wea_voltage = {}
189
190
    wea["connection point"] = wea["LokationMastrNummer"].map(
191
        bus["NetzanschlusspunktMastrNummer"]
192
    )
193
    wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])
194
195
    # Create the columns 'geometry' which will have location of each WT in a
196
    # point type
197
    wea = gpd.GeoDataFrame(
198
        wea,
199
        geometry=gpd.points_from_xy(
200
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
201
        ),
202
    )
203
204
    # wf_size storages the number of WT connected to each connection point
205
    wf_size = wea["connection point"].value_counts()
206
    # Delete all the connection points with less than 3 WT
207
    wf_size = wf_size[wf_size >= 3]
208
    # Filter all the WT which are not part of a wind farm of at least 3 WT
209
    wea = wea[wea["connection point"].isin(wf_size.index)]
210
    # current_wfs has all the geometries that represent the existing wind
211
    # farms
212
    current_wfs = gpd.GeoDataFrame(
213
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
214
    )
215
    for conn_point, wt_location in wea.groupby("connection point"):
216
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
217
            wt_location["geometry"].values
218
        ).convex_hull
219
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
220
221
    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
222
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
223
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
224
    # The 'filter_wts' is used to discard atypical values for the current wind
225
    # farms
226
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
227
        lambda x: filter_current_wf(x)
228
    )
229
230
    # Apply the filter based on area and perimeter
231
    current_wfs = current_wfs[current_wfs["filter2"]]
232
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
233
234
    wf_areas["area [km²]"] = wf_areas.area / 1000000
235
236
    # Exclude areas smaller than X m². X was calculated as the area of
237
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
238
    min_area = 4 * (0.126**2) * np.sqrt(3)
239
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
240
241
    # Find the centroid of all the suitable potential areas
242
    wf_areas["centroid"] = wf_areas.centroid
243
244
    # find the potential areas that intersects the convex hulls of current
245
    # wind farms and assign voltage levels
246
    wf_areas = wf_areas.to_crs(4326)
247
    for i in wf_areas.index:
248
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
249
        if intersection.any() == False:
250
            wf_areas.at[i, "voltage"] = "No Intersection"
251
        else:
252
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
253
254
    # wf_areas_ni has the potential areas which don't intersect any current
255
    # wind farm
256
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
257
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
258
    return wf_areas, wf_areas_ni
259
260
261
def wind_power_states(
262
    state_wf,
263
    state_wf_ni,
264
    state_mv_districts,
265
    target_power,
266
    scenario_year,
267
    source,
268
    fed_state,
269
):
270
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
271
    database.
272
273
    Parameters
274
    ----------
275
    state_wf: geodataframe, mandatory
276
        gdf containing all the wf in the state created based on existing wf.
277
    state_wf_ni: geodataframe, mandatory
278
        potential areas in the the state wich don't intersect any existing wf
279
    state_mv_districts: geodataframe, mandatory
280
        gdf containing all the MV/HV substations in the state
281
    target_power: int, mandatory
282
        Objective power for a state given in MW
283
    scenario_year: str, mandatory
284
        name of the scenario
285
    source: str, mandatory
286
        Type of energy genetor. Always "Wind_onshore" for this script.
287
    fed_state: str, mandatory
288
        Name of the state where the wind farms will be allocated
289
290
    """
291
292
    def match_district_se(x):
293
        for sub in hvmv_substation.index:
294
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
295
                return hvmv_substation.at[sub, "point"]
296
297
    con = db.engine()
298
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
299
    # hvmv_substation has the information about HV transmission lines in
300
    # Germany
301
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
302
303
    # Set wind potential depending on geographical location
304
    power_north = 21.05  # MW/km²
305
    power_south = 16.81  # MW/km²
306
    # Set a maximum installed capacity to limit the power of big potential
307
    # areas
308
    max_power_hv = 120  # in MW
309
    max_power_mv = 20  # in MW
310
    # Max distance between WF (connected to MV) and nearest HV substation
311
    # that allows its connection to HV.
312
    max_dist_hv = 20000  # in meters
313
314
    summary = pd.DataFrame(
315
        columns=["state", "target", "from existin WF", "MV districts"]
316
    )
317
318
    north = [
319
        "Schleswig-Holstein",
320
        "Mecklenburg-Vorpommern",
321
        "Niedersachsen",
322
        "Bremen",
323
        "Hamburg",
324
    ]
325
326
    if fed_state == "DE":
327
        sql = f"""SELECT * FROM boundaries.vg250_lan
328
        WHERE gen in {tuple(north)}
329
        """
330
        north_states = gpd.GeoDataFrame.from_postgis(
331
            sql, con, geom_col="geometry"
332
        )
333
        north_states.to_crs(3035, inplace=True)
334
        state_wf["nord"] = state_wf.within(north_states.unary_union)
335
        state_wf["inst capacity [MW]"] = state_wf.apply(
336
            lambda x: (
337
                power_north * x["area [km²]"]
338
                if x["nord"]
339
                else power_south * x["area [km²]"]),
340
            axis=1,
341
        )
342
    else:
343
        if fed_state in north:
344
            state_wf["inst capacity [MW]"] = (
345
                power_north * state_wf["area [km²]"]
346
            )
347
        else:
348
            state_wf["inst capacity [MW]"] = (
349
                power_south * state_wf["area [km²]"]
350
            )
351
352
    # Divide selected areas based on voltage of connection points
353
    wf_mv = state_wf[
354
        (state_wf["voltage"] != "Hochspannung")
355
        & (state_wf["voltage"] != "Hoechstspannung")
356
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
357
    ]
358
359
    wf_hv = state_wf[
360
        (state_wf["voltage"] == "Hochspannung")
361
        | (state_wf["voltage"] == "Hoechstspannung")
362
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
363
    ]
364
365
    # Wind farms connected to MV network will be connected to HV network if
366
    # the distance to the closest HV substation is =< max_dist_hv, and the
367
    # installed capacity is bigger than max_power_mv
368
    hvmv_substation = hvmv_substation.to_crs(3035)
369
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
370
        lambda x: int(x.split(";")[0])
371
    )
372
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
373
    hv_substations = hv_substations.unary_union  # join all the hv_substations
374
    wf_mv["dist_to_HV"] = (
375
        state_wf["geom"].to_crs(3035).distance(hv_substations)
376
    )
377
    wf_mv_to_hv = wf_mv[
378
        (wf_mv["dist_to_HV"] <= max_dist_hv)
379
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
380
    ]
381
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
382
    wf_mv_to_hv["voltage"] = "Hochspannung"
383
384
    wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
385
    wf_mv = wf_mv[
386
        (wf_mv["dist_to_HV"] > max_dist_hv)
387
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
388
    ]
389
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])
390
391
    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
392
        lambda x: x if x < max_power_hv else max_power_hv
393
    )
394
395
    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
396
        lambda x: x if x < max_power_mv else max_power_mv
397
    )
398
399
    wind_farms = pd.concat([wf_hv, wf_mv])
400
401
    # Adjust the total installed capacity to the scenario
402
    total_wind_power = (
403
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
404
    )
405
406
    if total_wind_power > target_power:
407
        scale_factor = target_power / total_wind_power
408
        wf_mv["inst capacity [MW]"] = (
409
            wf_mv["inst capacity [MW]"] * scale_factor
410
        )
411
        wf_hv["inst capacity [MW]"] = (
412
            wf_hv["inst capacity [MW]"] * scale_factor
413
        )
414
        wind_farms = pd.concat([wf_hv, wf_mv])
415
        summary = pd.concat(
416
            [
417
                summary,
418
                pd.DataFrame(
419
                    index=[summary.index.max() + 1],
420
                    data={
421
                        "state": fed_state,
422
                        "target": target_power,
423
                        "from existin WF": wind_farms[
424
                            "inst capacity [MW]"
425
                        ].sum(),
426
                        "MV districts": 0,
427
                    },
428
                ),
429
            ],
430
            ignore_index=True,
431
        )
432
    else:
433
        extra_wf = state_mv_districts.copy()
434
        extra_wf = extra_wf.set_geometry("geom")
435
        extra_wf["area [km²]"] = 0.0
436
        for district in extra_wf.index:
437
            try:
438
                pot_area_district = gpd.clip(
439
                    state_wf_ni, extra_wf.at[district, "geom"]
440
                )
441
                extra_wf.at[district, "area [km²]"] = pot_area_district[
442
                    "area [km²]"
443
                ].sum()
444
            except:
445
                print(district)
446
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
447
        total_new_area = extra_wf["area [km²]"].sum()
448
        scale_factor = (target_power - total_wind_power) / total_new_area
449
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
450
        extra_wf["voltage"] = "Hochspannung"
451
        summary = pd.concat(
452
            [
453
                summary,
454
                pd.DataFrame(
455
                    index=[summary.index.max() + 1],
456
                    data={
457
                        "state": fed_state,
458
                        "target": target_power,
459
                        "from existin WF": wind_farms[
460
                            "inst capacity [MW]"
461
                        ].sum(),
462
                        "MV districts": extra_wf["inst capacity [MW]"].sum(),
463
                    },
464
                ),
465
            ],
466
            ignore_index=True,
467
        )
468
        extra_wf.to_crs(4326, inplace=True)
469
        wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)
470
471
    # Use Definition of thresholds for voltage level assignment
472
    wind_farms["voltage_level"] = 0
473
    for i in wind_farms.index:
474
        try:
475
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
476
                wind_farms.at[i, "voltage_level"] = 5
477
                continue
478
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
479
                wind_farms.at[i, "voltage_level"] = 4
480
                continue
481
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
482
                wind_farms.at[i, "voltage_level"] = 3
483
                continue
484
        except:
485
            print(i)
486
487
    # Look for the maximum id in the table egon_power_plants
488
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
489
    max_id = pd.read_sql(sql, con)
490
    max_id = max_id["max"].iat[0]
491
    if max_id is None:
492
        wind_farm_id = 1
493
    else:
494
        wind_farm_id = int(max_id + 1)
495
496
    # write_table in egon-data database:
497
498
    # Copy relevant columns from wind_farms
499
    insert_wind_farms = wind_farms[
500
        ["inst capacity [MW]", "voltage_level", "centroid"]
501
    ]
502
503
    # Set static column values
504
    insert_wind_farms["carrier"] = source
505
    insert_wind_farms["scenario"] = scenario_year
506
507
    # Change name and crs of geometry column
508
    insert_wind_farms = (
509
        insert_wind_farms.rename(
510
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
511
        )
512
        .set_geometry("geom")
513
        .to_crs(4326)
514
    )
515
516
    # Reset index
517
    insert_wind_farms.index = pd.RangeIndex(
518
        start=wind_farm_id,
519
        stop=wind_farm_id + len(insert_wind_farms),
520
        name="id",
521
    )
522
523
    # Delete old wind_onshore generators
524
    db.execute_sql(
525
        f"""DELETE FROM supply.egon_power_plants
526
        WHERE carrier = 'wind_onshore'
527
        AND scenario = '{scenario_year}'
528
        """
529
    )
530
531
    # Insert into database
532
    insert_wind_farms.reset_index().to_postgis(
533
        "egon_power_plants",
534
        schema="supply",
535
        con=db.engine(),
536
        if_exists="append",
537
    )
538
    return wind_farms, summary
539
540
541
def generate_map():
542
    """Generates a map with the position of all the wind farms
543
544
    Parameters
545
    ----------
546
    *No parameters required
547
548
    """
549
    con = db.engine()
550
551
    # Import wind farms from egon-data
552
    sql = (
553
        "SELECT  carrier, el_capacity, geom, scenario FROM "
554
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
555
    )
556
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
557
        sql, con, geom_col="geom", crs=4326
558
    )
559
    wind_farms_t = wind_farms_t.to_crs(3035)
560
561
    for scenario in wind_farms_t.scenario.unique():
562
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
563
        # mv_districts has geographic info of medium voltage districts in
564
        # Germany
565
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
566
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
567
        mv_districts = mv_districts.to_crs(3035)
568
569
        mv_districts["power"] = 0.0
570
        for std in mv_districts.index:
571
            try:
572
                mv_districts.at[std, "power"] = gpd.clip(
573
                    wind_farms, mv_districts.at[std, "geom"]
574
                ).el_capacity.sum()
575
            except:
576
                print(std)
577
578
        fig, ax = plt.subplots(1, 1)
579
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
580
        mv_districts.plot(
581
            ax=ax,
582
            column="power",
583
            cmap="magma_r",
584
            legend=True,
585
            legend_kwds={
586
                "label": "Installed capacity in MW",
587
                "orientation": "vertical",
588
            },
589
        )
590
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
591
    return 0
592