Passed
Pull Request — dev (#1053)
by
unknown
01:34
created

data.datasets.power_plants.wind_farms   A

Complexity

Total Complexity 38

Size/Duplication

Total Lines 517
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 38
eloc 309
dl 0
loc 517
rs 9.36
c 0
b 0
f 0

4 Functions

Rating   Name   Duplication   Size   Complexity  
B insert() 0 95 4
F wind_power_states() 0 232 18
A generate_map() 0 48 4
D generate_wind_farms() 0 125 12
1
from matplotlib import pyplot as plt
2
from shapely.geometry import LineString, MultiPoint, Point, Polygon
3
import geopandas as gpd
4
import numpy as np
5
import pandas as pd
6
7
import egon.data.config
8
from egon.data import db
9
10
11
def insert():
12
    """Main function. Import power objectives generate results calling the
13
    functions "generate_wind_farms" and  "wind_power_states".
14
15
    Parameters
16
    ----------
17
    *No parameters required
18
19
    """
20
21
    con = db.engine()
22
23
    # federal_std has the shapes of the German states
24
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
25
    federal_std = gpd.GeoDataFrame.from_postgis(
26
        sql, con, geom_col="geometry", crs=4326
27
    )
28
29
    # target_power_df has the expected capacity of each federal state
30
    sql = "SELECT  carrier, capacity, nuts, scenario_name FROM supply.egon_scenario_capacities"
31
    target_power_df = pd.read_sql(sql, con)
32
33
    # mv_districts has geographic info of medium voltage districts in Germany
34
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
35
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
36
37
    # Delete all the water bodies from the federal states shapes
38
    federal_std = federal_std[federal_std["gf"] == 4]
39
    federal_std.drop(columns=["gf"], inplace=True)
40
    # Filter the potential expected from wind_onshore
41
    target_power_df = target_power_df[
42
        target_power_df["carrier"] == "wind_onshore"
43
    ]
44
    target_power_df.set_index("nuts", inplace=True)
45
    target_power_df["geom"] = Point(0, 0)
46
47
    # Join the geometries which belong to the same states
48
    for std in target_power_df.index:
49
        df = federal_std[federal_std["nuts"] == std]
50
        if df.size > 0:
51
            target_power_df.at[std, "name"] = df["gen"].iat[0]
52
        else:
53
            target_power_df.at[std, "name"] = np.nan
54
        target_power_df.at[std, "geom"] = df.unary_union
55
    target_power_df = gpd.GeoDataFrame(
56
        target_power_df, geometry="geom", crs=4326
57
    )
58
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
59
    target_power_df = target_power_df.to_crs(3035)
60
61
    #Create the shape for full Germany
62
    target_power_df.at['DE', 'geom'] = target_power_df['geom'].unary_union
63
    target_power_df.at['DE', 'name'] = 'Germany'
64
    # Generate WFs for Germany based on potential areas and existing WFs
65
    wf_areas, wf_areas_ni = generate_wind_farms()
66
67
    # Change the columns "geometry" of this GeoDataFrames
68
    wf_areas.set_geometry("centroid", inplace=True)
69
    wf_areas_ni.set_geometry("centroid", inplace=True)
70
71
    # Create centroids of mv_districts to apply the clip function
72
    mv_districts["centroid"] = mv_districts.centroid
73
    mv_districts.set_geometry("centroid", inplace=True)
74
75
    summary_t = pd.DataFrame()
76
    farms = pd.DataFrame()
77
78
    # Fit wind farms scenarions for each one of the states
79
    for scenario in target_power_df.index:
80
        state_wf = gpd.clip(wf_areas, target_power_df.at[scenario, "geom"])
81
        state_wf_ni = gpd.clip(
82
            wf_areas_ni, target_power_df.at[scenario, "geom"]
83
        )
84
        state_mv_districts = gpd.clip(
85
            mv_districts, target_power_df.at[scenario, "geom"]
86
        )
87
        target_power = target_power_df.at[scenario, "capacity"]
88
        scenario_year = target_power_df.at[scenario, "scenario_name"]
89
        source = target_power_df.at[scenario, "carrier"]
90
        fed_state = target_power_df.at[scenario, "name"]
91
        wind_farms_state, summary_state = wind_power_states(
92
            state_wf,
93
            state_wf_ni,
94
            state_mv_districts,
95
            target_power,
96
            scenario_year,
97
            source,
98
            fed_state,
99
        )
100
        summary_t = summary_t.append(summary_state)
101
        farms = farms.append(wind_farms_state)
102
103
    generate_map()
104
105
    return (summary_t, farms)
106
107
108
def generate_wind_farms():
109
    """Generate wind farms based on existing wind farms.
110
111
    Parameters
112
    ----------
113
    *No parameters required
114
115
    """
116
    # get config
117
    cfg = egon.data.config.datasets()["power_plants"]
118
119
    # Due to typos in some inputs, some areas of existing wind farms
120
    # should be discarded using perimeter and area filters
121
    def filter_current_wf(wf_geometry):
122
        if wf_geometry.geom_type == "Point":
123
            return True
124
        if wf_geometry.geom_type == "Polygon":
125
            area = wf_geometry.area
126
            length = wf_geometry.length
127
            # Filter based on the biggest (# of WT) wind farm
128
            return (area < 40000000) & (length < 40000)
129
        if wf_geometry.geom_type == "LineString":
130
            length = wf_geometry.length
131
            return length < 1008  # 8 * rotor diameter (8*126m)
132
133
    # The function 'wind_farm' returns the connection point of a wind turbine
134
    def wind_farm(x):
135
        try:
136
            return map_ap_wea_farm[x]
137
        except:
138
            return np.nan
139
140
    # The function 'voltage' returns the voltage level a wind turbine operates
141
    def voltage(x):
142
        try:
143
            return map_ap_wea_voltage[x]
144
        except:
145
            return np.nan
146
147
    # Connect to the data base
148
    con = db.engine()
149
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
150
    # wf_areas has all the potential areas geometries for wind farms
151
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
152
    # bus has the connection points of the wind farms
153
    bus = pd.read_csv(cfg["sources"]["mastr_location"], index_col= "MaStRNummer")
154
    # Drop all the rows without connection point
155
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
156
    # wea has info of each wind turbine in Germany.
157
    wea = pd.read_csv(cfg["sources"]["mastr_wind"])
158
159
    # Delete all the rows without information about geographical location
160
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
161
    # Delete all the offshore wind turbines
162
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
163
    # the variable map_ap_wea_farm have the connection point of all the available wt
164
    # in the dataframe bus.
165
    map_ap_wea_farm = {}
166
    map_ap_wea_voltage = {}
167
168
    wea["connection point"] = wea["LokationMastrNummer"].map(
169
        bus["NetzanschlusspunktMastrNummer"])
170
    wea["voltage"] = wea["LokationMastrNummer"].map(
171
        bus["NetzanschlusspunktMastrNummer"])
172
173
    # Create the columns 'geometry' which will have location of each WT in a point type
174
    wea = gpd.GeoDataFrame(
175
        wea,
176
        geometry=gpd.points_from_xy(
177
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
178
        ),
179
    )
180
181
    # wf_size storages the number of WT connected to each connection point
182
    wf_size = wea["connection point"].value_counts()
183
    # Delete all the connection points with less than 3 WT
184
    wf_size = wf_size[wf_size >= 3]
185
    # Filter all the WT which are not part of a wind farm of at least 3 WT
186
    wea = wea[wea["connection point"].isin(wf_size.index)]
187
    # current_wfs has all the geometries that represent the existing wind farms
188
    current_wfs = gpd.GeoDataFrame(
189
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
190
    )
191
    for conn_point, wt_location in wea.groupby("connection point"):
192
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
193
            wt_location["geometry"].values
194
        ).convex_hull
195
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
196
197
    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
198
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
199
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
200
    # The 'filter_wts' is used to discard atypical values for the current wind farms
201
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
202
        lambda x: filter_current_wf(x)
203
    )
204
205
    # Apply the filter based on area and perimeter
206
    current_wfs = current_wfs[current_wfs["filter2"]]
207
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
208
209
    wf_areas["area [km²]"] = wf_areas.area / 1000000
210
211
    # Exclude areas smaller than X m². X was calculated as the area of
212
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
213
    min_area = 4 * (0.126 ** 2) * np.sqrt(3)
214
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
215
216
    # Find the centroid of all the suitable potential areas
217
    wf_areas["centroid"] = wf_areas.centroid
218
219
    # find the potential areas that intersects the convex hulls of current wind farms
220
    # and assign voltage levels
221
    wf_areas = wf_areas.to_crs(4326)
222
    for i in wf_areas.index:
223
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
224
        if intersection.any() == False:
225
            wf_areas.at[i, "voltage"] = "No Intersection"
226
        else:
227
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
228
229
    # wf_areas_ni has the potential areas which don't intersect any current wind farm
230
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
231
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
232
    return wf_areas, wf_areas_ni
233
234
235
def wind_power_states(
236
    state_wf,
237
    state_wf_ni,
238
    state_mv_districts,
239
    target_power,
240
    scenario_year,
241
    source,
242
    fed_state,
243
):
244
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database.
245
246
    Parameters
247
    ----------
248
    state_wf: geodataframe, mandatory
249
        gdf containing all the wf in the state created based on existing wf.
250
    state_wf_ni: geodataframe, mandatory
251
        potential areas in the the state wich don't intersect any existing wf
252
    state_mv_districts: geodataframe, mandatory
253
        gdf containing all the MV/HV substations in the state
254
    target_power: int, mandatory
255
        Objective power for a state given in MW
256
    scenario_year: str, mandatory
257
        name of the scenario
258
    source: str, mandatory
259
        Type of energy genetor. Always "Wind_onshore" for this script.
260
    fed_state: str, mandatory
261
        Name of the state where the wind farms will be allocated
262
263
    """
264
265
    def match_district_se(x):
266
        for sub in hvmv_substation.index:
267
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
268
                return hvmv_substation.at[sub, "point"]
269
270
    con = db.engine()
271
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
272
    # hvmv_substation has the information about HV transmission lines in Germany
273
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
274
275
    # Set wind potential depending on geographical location
276
    power_north = 21.05  # MW/km²
277
    power_south = 16.81  # MW/km²
278
    # Set a maximum installed capacity to limit the power of big potential areas
279
    max_power_hv = 120  # in MW
280
    max_power_mv = 20  # in MW
281
    # Max distance between WF (connected to MV) and nearest HV substation that
282
    # allows its connection to HV.
283
    max_dist_hv = 20000  # in meters
284
285
    summary = pd.DataFrame(
286
        columns=["state", "target", "from existin WF", "MV districts"]
287
    )
288
289
    north = [
290
        "Schleswig-Holstein",
291
        "Mecklenburg-Vorpommern",
292
        "Niedersachsen",
293
        "Bremen",
294
        "Hamburg",
295
    ]
296
297
    if fed_state in north:
298
        state_wf["inst capacity [MW]"] = power_north * state_wf["area [km²]"]
299
    else:
300
        state_wf["inst capacity [MW]"] = power_south * state_wf["area [km²]"]
301
302
    # Divide selected areas based on voltage of connection points
303
    wf_mv = state_wf[
304
        (state_wf["voltage"] != "Hochspannung")
305
        & (state_wf["voltage"] != "Hoechstspannung")
306
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
307
    ]
308
309
    wf_hv = state_wf[
310
        (state_wf["voltage"] == "Hochspannung")
311
        | (state_wf["voltage"] == "Hoechstspannung")
312
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
313
    ]
314
315
    # Wind farms connected to MV network will be connected to HV network if the distance
316
    # to the closest HV substation is =< max_dist_hv, and the installed capacity
317
    # is bigger than max_power_mv
318
    hvmv_substation = hvmv_substation.to_crs(3035)
319
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
320
        lambda x: int(x.split(";")[0])
321
    )
322
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
323
    hv_substations = hv_substations.unary_union  # join all the hv_substations
324
    wf_mv["dist_to_HV"] = (
325
        state_wf["geom"].to_crs(3035).distance(hv_substations)
326
    )
327
    wf_mv_to_hv = wf_mv[
328
        (wf_mv["dist_to_HV"] <= max_dist_hv)
329
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
330
    ]
331
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
332
    wf_mv_to_hv["voltage"] = "Hochspannung"
333
334
    wf_hv = wf_hv.append(wf_mv_to_hv)
335
    wf_mv = wf_mv[
336
        (wf_mv["dist_to_HV"] > max_dist_hv)
337
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
338
    ]
339
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])
340
341
    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
342
        lambda x: x if x < max_power_hv else max_power_hv
343
    )
344
345
    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
346
        lambda x: x if x < max_power_mv else max_power_mv
347
    )
348
349
    wind_farms = wf_hv.append(wf_mv)
350
351
    # Adjust the total installed capacity to the scenario
352
    total_wind_power = (
353
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
354
    )
355
    if total_wind_power > target_power:
356
        scale_factor = target_power / total_wind_power
357
        wf_mv["inst capacity [MW]"] = (
358
            wf_mv["inst capacity [MW]"] * scale_factor
359
        )
360
        wf_hv["inst capacity [MW]"] = (
361
            wf_hv["inst capacity [MW]"] * scale_factor
362
        )
363
        wind_farms = wf_hv.append(wf_mv)
364
        summary = summary.append(
365
            {
366
                "state": fed_state,
367
                "target": target_power,
368
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
369
                "MV districts": 0,
370
            },
371
            ignore_index=True,
372
        )
373
    else:
374
        extra_wf = state_mv_districts.copy()
375
        extra_wf = extra_wf.drop(columns=["centroid"])
376
        # the column centroid has the coordinates of the substation corresponting
377
        # to each mv_grid_district
378
        extra_wf["centroid"] = extra_wf.apply(match_district_se, axis=1)
379
        extra_wf = extra_wf.set_geometry("centroid")
380
        extra_wf["area [km²]"] = 0.0
381
        for district in extra_wf.index:
382
            try:
383
                pot_area_district = gpd.clip(
384
                    state_wf_ni, extra_wf.at[district, "geom"]
385
                )
386
                extra_wf.at[district, "area [km²]"] = pot_area_district[
387
                    "area [km²]"
388
                ].sum()
389
            except:
390
                print(district)
391
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
392
        total_new_area = extra_wf["area [km²]"].sum()
393
        scale_factor = (target_power - total_wind_power) / total_new_area
394
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
395
        extra_wf["voltage"] = "Hochspannung"
396
        summary = summary.append(
397
            {
398
                "state": fed_state,
399
                "target": target_power,
400
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
401
                "MV districts": extra_wf["inst capacity [MW]"].sum(),
402
            },
403
            ignore_index=True,
404
        )
405
        wind_farms = wind_farms.append(extra_wf, ignore_index=True)
406
407
    # Use Definition of thresholds for voltage level assignment
408
    wind_farms["voltage_level"] = 0
409
    for i in wind_farms.index:
410
        try:
411
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
412
                wind_farms.at[i, "voltage_level"] = 5
413
                continue
414
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
415
                wind_farms.at[i, "voltage_level"] = 4
416
                continue
417
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
418
                wind_farms.at[i, "voltage_level"] = 3
419
                continue
420
        except:
421
            print(i)
422
423
    # Look for the maximum id in the table egon_power_plants
424
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
425
    max_id = pd.read_sql(sql, con)
426
    max_id = max_id["max"].iat[0]
427
    if max_id == None:
428
        wind_farm_id = 1
429
    else:
430
        wind_farm_id = int(max_id + 1)
431
432
    # write_table in egon-data database:
433
434
    # Copy relevant columns from wind_farms
435
    insert_wind_farms = wind_farms[
436
        ["inst capacity [MW]", "voltage_level", "centroid"]
437
    ]
438
439
    # Set static column values
440
    insert_wind_farms["carrier"] = source
441
    insert_wind_farms["scenario"] = scenario_year
442
443
    # Change name and crs of geometry column
444
    insert_wind_farms = (
445
        insert_wind_farms.rename(
446
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
447
        )
448
        .set_geometry("geom")
449
        .to_crs(4326)
450
    )
451
452
    # Reset index
453
    insert_wind_farms.index = pd.RangeIndex(
454
        start=wind_farm_id,
455
        stop=wind_farm_id + len(insert_wind_farms),
456
        name="id",
457
    )
458
459
    # Insert into database
460
    insert_wind_farms.reset_index().to_postgis(
461
        "egon_power_plants",
462
        schema="supply",
463
        con=db.engine(),
464
        if_exists="append",
465
    )
466
    return wind_farms, summary
467
468
469
def generate_map():
470
    """Generates a map with the position of all the wind farms
471
472
    Parameters
473
    ----------
474
    *No parameters required
475
476
    """
477
    con = db.engine()
478
479
    # Import wind farms from egon-data
480
    sql = """SELECT  carrier, el_capacity, geom, scenario FROM supply.egon_power_plants
481
    WHERE carrier = 'wind_onshore'"""
482
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
483
        sql, con, geom_col="geom", crs=4326
484
    )
485
    wind_farms_t = wind_farms_t.to_crs(3035)
486
487
    for scenario in wind_farms_t.scenario.unique():
488
        wind_farms = wind_farms_t[wind_farms_t['scenario'] == scenario]
489
        # mv_districts has geographic info of medium voltage districts in Germany
490
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
491
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
492
        mv_districts = mv_districts.to_crs(3035)
493
494
        mv_districts["power"] = 0.0
495
        for std in mv_districts.index:
496
            try:
497
                mv_districts.at[std, "power"] = gpd.clip(
498
                    wind_farms, mv_districts.at[std, "geom"]
499
                ).el_capacity.sum()
500
            except:
501
                print(std)
502
503
        fig, ax = plt.subplots(1, 1)
504
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
505
        mv_districts.plot(
506
            ax=ax,
507
            column="power",
508
            cmap="magma_r",
509
            legend=True,
510
            legend_kwds={
511
                "label": "Installed capacity in MW",
512
                "orientation": "vertical",
513
            },
514
        )
515
        plt.savefig(f'wind_farms_{scenario}.png', dpi=300)
516
    return 0
517