Passed
Pull Request — dev (#1063)
by
unknown
01:34
created

generate_wind_farms()   D

Complexity

Conditions 12

Size

Total Lines 134
Code Lines 72

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 72
dl 0
loc 134
rs 4.2763
c 0
b 0
f 0
cc 12
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like data.datasets.power_plants.wind_farms.generate_wind_farms() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from matplotlib import pyplot as plt
2
from shapely.geometry import MultiPoint, Point
3
import geopandas as gpd
4
import numpy as np
5
import pandas as pd
6
7
from egon.data import db
8
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
9
import egon.data.config
10
11
12
def insert():
13
    """Main function. Import power objectives generate results calling the
14
    functions "generate_wind_farms" and  "wind_power_states".
15
16
    Parameters
17
    ----------
18
    *No parameters required
19
20
    """
21
22
    con = db.engine()
23
24
    # federal_std has the shapes of the German states
25
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
26
    federal_std = gpd.GeoDataFrame.from_postgis(
27
        sql, con, geom_col="geometry", crs=4326
28
    )
29
30
    # target_power_df has the expected capacity of each federal state
31
    sql = (
32
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
33
        "supply.egon_scenario_capacities"
34
    )
35
    target_power_df = pd.read_sql(sql, con)
36
37
    # mv_districts has geographic info of medium voltage districts in Germany
38
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
39
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
40
41
    # Delete all the water bodies from the federal states shapes
42
    federal_std = federal_std[federal_std["gf"] == 4]
43
    federal_std.drop(columns=["gf"], inplace=True)
44
    # Filter the potential expected from wind_onshore
45
    target_power_df = target_power_df[
46
        target_power_df["carrier"] == "wind_onshore"
47
    ]
48
    target_power_df.set_index("nuts", inplace=True)
49
    target_power_df["geom"] = Point(0, 0)
50
51
    # Join the geometries which belong to the same states
52
    for std in target_power_df.index:
53
        df = federal_std[federal_std["nuts"] == std]
54
        if df.size > 0:
55
            target_power_df.at[std, "name"] = df["gen"].iat[0]
56
        else:
57
            target_power_df.at[std, "name"] = np.nan
58
        target_power_df.at[std, "geom"] = df.unary_union
59
    target_power_df = gpd.GeoDataFrame(
60
        target_power_df, geometry="geom", crs=4326
61
    )
62
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
63
    target_power_df = target_power_df.to_crs(3035)
64
65
    # Create the shape for full Germany
66
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
67
    target_power_df.at["DE", "name"] = "Germany"
68
    # Generate WFs for Germany based on potential areas and existing WFs
69
    wf_areas, wf_areas_ni = generate_wind_farms()
70
71
    # Change the columns "geometry" of this GeoDataFrames
72
    wf_areas.set_geometry("centroid", inplace=True)
73
    wf_areas_ni.set_geometry("centroid", inplace=True)
74
75
    # Create centroids of mv_districts to apply the clip function
76
    mv_districts["centroid"] = mv_districts.centroid
77
    mv_districts.set_geometry("centroid", inplace=True)
78
79
    summary_t = pd.DataFrame()
80
    farms = pd.DataFrame()
81
82
    # Fit wind farms scenarions for each one of the states
83
    for scenario in target_power_df.index:
84
        state_wf = gpd.clip(wf_areas, target_power_df.at[scenario, "geom"])
85
        state_wf_ni = gpd.clip(
86
            wf_areas_ni, target_power_df.at[scenario, "geom"]
87
        )
88
        state_mv_districts = gpd.clip(
89
            mv_districts, target_power_df.at[scenario, "geom"]
90
        )
91
        target_power = target_power_df.at[scenario, "capacity"]
92
        scenario_year = target_power_df.at[scenario, "scenario_name"]
93
        source = target_power_df.at[scenario, "carrier"]
94
        fed_state = target_power_df.at[scenario, "name"]
95
        wind_farms_state, summary_state = wind_power_states(
96
            state_wf,
97
            state_wf_ni,
98
            state_mv_districts,
99
            target_power,
100
            scenario_year,
101
            source,
102
            fed_state,
103
        )
104
        summary_t = summary_t.append(summary_state)
105
        farms = farms.append(wind_farms_state)
106
107
    generate_map()
108
109
    return (summary_t, farms)
110
111
112
def generate_wind_farms():
113
    """Generate wind farms based on existing wind farms.
114
115
    Parameters
116
    ----------
117
    *No parameters required
118
119
    """
120
    # get config
121
    cfg = egon.data.config.datasets()["power_plants"]
122
123
    # Due to typos in some inputs, some areas of existing wind farms
124
    # should be discarded using perimeter and area filters
125
    def filter_current_wf(wf_geometry):
126
        if wf_geometry.geom_type == "Point":
127
            return True
128
        if wf_geometry.geom_type == "Polygon":
129
            area = wf_geometry.area
130
            length = wf_geometry.length
131
            # Filter based on the biggest (# of WT) wind farm
132
            return (area < 40000000) & (length < 40000)
133
        if wf_geometry.geom_type == "LineString":
134
            length = wf_geometry.length
135
            return length < 1008  # 8 * rotor diameter (8*126m)
136
137
    # The function 'wind_farm' returns the connection point of a wind turbine
138
    def wind_farm(x):
139
        try:
140
            return map_ap_wea_farm[x]
141
        except:
142
            return np.nan
143
144
    # The function 'voltage' returns the voltage level a wind turbine operates
145
    def voltage(x):
146
        try:
147
            return map_ap_wea_voltage[x]
148
        except:
149
            return np.nan
150
151
    # Connect to the data base
152
    con = db.engine()
153
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
154
    # wf_areas has all the potential areas geometries for wind farms
155
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
156
    # bus has the connection points of the wind farms
157
    bus = pd.read_csv(
158
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
159
        index_col="MaStRNummer",
160
    )
161
    # Drop all the rows without connection point
162
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
163
    # wea has info of each wind turbine in Germany.
164
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
165
166
    # Delete all the rows without information about geographical location
167
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
168
    # Delete all the offshore wind turbines
169
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
170
    # the variable map_ap_wea_farm have the connection point of all the
171
    # available wt in the dataframe bus.
172
    map_ap_wea_farm = {}
173
    map_ap_wea_voltage = {}
174
175
    wea["connection point"] = wea["LokationMastrNummer"].map(
176
        bus["NetzanschlusspunktMastrNummer"]
177
    )
178
    wea["voltage"] = wea["LokationMastrNummer"].map(
179
        bus["NetzanschlusspunktMastrNummer"]
180
    )
181
182
    # Create the columns 'geometry' which will have location of each WT in a
183
    # point type
184
    wea = gpd.GeoDataFrame(
185
        wea,
186
        geometry=gpd.points_from_xy(
187
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
188
        ),
189
    )
190
191
    # wf_size storages the number of WT connected to each connection point
192
    wf_size = wea["connection point"].value_counts()
193
    # Delete all the connection points with less than 3 WT
194
    wf_size = wf_size[wf_size >= 3]
195
    # Filter all the WT which are not part of a wind farm of at least 3 WT
196
    wea = wea[wea["connection point"].isin(wf_size.index)]
197
    # current_wfs has all the geometries that represent the existing wind
198
    # farms
199
    current_wfs = gpd.GeoDataFrame(
200
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
201
    )
202
    for conn_point, wt_location in wea.groupby("connection point"):
203
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
204
            wt_location["geometry"].values
205
        ).convex_hull
206
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
207
208
    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
209
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
210
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
211
    # The 'filter_wts' is used to discard atypical values for the current wind
212
    # farms
213
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
214
        lambda x: filter_current_wf(x)
215
    )
216
217
    # Apply the filter based on area and perimeter
218
    current_wfs = current_wfs[current_wfs["filter2"]]
219
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
220
221
    wf_areas["area [km²]"] = wf_areas.area / 1000000
222
223
    # Exclude areas smaller than X m². X was calculated as the area of
224
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
225
    min_area = 4 * (0.126**2) * np.sqrt(3)
226
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
227
228
    # Find the centroid of all the suitable potential areas
229
    wf_areas["centroid"] = wf_areas.centroid
230
231
    # find the potential areas that intersects the convex hulls of current
232
    # wind farms and assign voltage levels
233
    wf_areas = wf_areas.to_crs(4326)
234
    for i in wf_areas.index:
235
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
236
        if intersection.any() == False:
237
            wf_areas.at[i, "voltage"] = "No Intersection"
238
        else:
239
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
240
241
    # wf_areas_ni has the potential areas which don't intersect any current
242
    # wind farm
243
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
244
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
245
    return wf_areas, wf_areas_ni
246
247
248
def wind_power_states(
249
    state_wf,
250
    state_wf_ni,
251
    state_mv_districts,
252
    target_power,
253
    scenario_year,
254
    source,
255
    fed_state,
256
):
257
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
258
    database.
259
260
    Parameters
261
    ----------
262
    state_wf: geodataframe, mandatory
263
        gdf containing all the wf in the state created based on existing wf.
264
    state_wf_ni: geodataframe, mandatory
265
        potential areas in the the state wich don't intersect any existing wf
266
    state_mv_districts: geodataframe, mandatory
267
        gdf containing all the MV/HV substations in the state
268
    target_power: int, mandatory
269
        Objective power for a state given in MW
270
    scenario_year: str, mandatory
271
        name of the scenario
272
    source: str, mandatory
273
        Type of energy genetor. Always "Wind_onshore" for this script.
274
    fed_state: str, mandatory
275
        Name of the state where the wind farms will be allocated
276
277
    """
278
279
    def match_district_se(x):
280
        for sub in hvmv_substation.index:
281
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
282
                return hvmv_substation.at[sub, "point"]
283
284
    con = db.engine()
285
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
286
    # hvmv_substation has the information about HV transmission lines in
287
    # Germany
288
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
289
290
    # Set wind potential depending on geographical location
291
    power_north = 21.05  # MW/km²
292
    power_south = 16.81  # MW/km²
293
    # Set a maximum installed capacity to limit the power of big potential
294
    # areas
295
    max_power_hv = 120  # in MW
296
    max_power_mv = 20  # in MW
297
    # Max distance between WF (connected to MV) and nearest HV substation
298
    # that allows its connection to HV.
299
    max_dist_hv = 20000  # in meters
300
301
    summary = pd.DataFrame(
302
        columns=["state", "target", "from existin WF", "MV districts"]
303
    )
304
305
    north = [
306
        "Schleswig-Holstein",
307
        "Mecklenburg-Vorpommern",
308
        "Niedersachsen",
309
        "Bremen",
310
        "Hamburg",
311
    ]
312
313
    if fed_state in north:
314
        state_wf["inst capacity [MW]"] = power_north * state_wf["area [km²]"]
315
    else:
316
        state_wf["inst capacity [MW]"] = power_south * state_wf["area [km²]"]
317
318
    # Divide selected areas based on voltage of connection points
319
    wf_mv = state_wf[
320
        (state_wf["voltage"] != "Hochspannung")
321
        & (state_wf["voltage"] != "Hoechstspannung")
322
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
323
    ]
324
325
    wf_hv = state_wf[
326
        (state_wf["voltage"] == "Hochspannung")
327
        | (state_wf["voltage"] == "Hoechstspannung")
328
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
329
    ]
330
331
    # Wind farms connected to MV network will be connected to HV network if
332
    # the distance to the closest HV substation is =< max_dist_hv, and the
333
    # installed capacity is bigger than max_power_mv
334
    hvmv_substation = hvmv_substation.to_crs(3035)
335
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
336
        lambda x: int(x.split(";")[0])
337
    )
338
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
339
    hv_substations = hv_substations.unary_union  # join all the hv_substations
340
    wf_mv["dist_to_HV"] = (
341
        state_wf["geom"].to_crs(3035).distance(hv_substations)
342
    )
343
    wf_mv_to_hv = wf_mv[
344
        (wf_mv["dist_to_HV"] <= max_dist_hv)
345
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
346
    ]
347
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
348
    wf_mv_to_hv["voltage"] = "Hochspannung"
349
350
    wf_hv = wf_hv.append(wf_mv_to_hv)
351
    wf_mv = wf_mv[
352
        (wf_mv["dist_to_HV"] > max_dist_hv)
353
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
354
    ]
355
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])
356
357
    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
358
        lambda x: x if x < max_power_hv else max_power_hv
359
    )
360
361
    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
362
        lambda x: x if x < max_power_mv else max_power_mv
363
    )
364
365
    wind_farms = wf_hv.append(wf_mv)
366
367
    # Adjust the total installed capacity to the scenario
368
    total_wind_power = (
369
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
370
    )
371
    if total_wind_power > target_power:
372
        scale_factor = target_power / total_wind_power
373
        wf_mv["inst capacity [MW]"] = (
374
            wf_mv["inst capacity [MW]"] * scale_factor
375
        )
376
        wf_hv["inst capacity [MW]"] = (
377
            wf_hv["inst capacity [MW]"] * scale_factor
378
        )
379
        wind_farms = wf_hv.append(wf_mv)
380
        summary = summary.append(
381
            {
382
                "state": fed_state,
383
                "target": target_power,
384
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
385
                "MV districts": 0,
386
            },
387
            ignore_index=True,
388
        )
389
    else:
390
        extra_wf = state_mv_districts.copy()
391
        extra_wf = extra_wf.drop(columns=["centroid"])
392
        # the column centroid has the coordinates of the substation
393
        # corresponding to each mv_grid_district
394
        extra_wf["centroid"] = extra_wf.apply(match_district_se, axis=1)
395
        extra_wf = extra_wf.set_geometry("centroid")
396
        extra_wf["area [km²]"] = 0.0
397
        for district in extra_wf.index:
398
            try:
399
                pot_area_district = gpd.clip(
400
                    state_wf_ni, extra_wf.at[district, "geom"]
401
                )
402
                extra_wf.at[district, "area [km²]"] = pot_area_district[
403
                    "area [km²]"
404
                ].sum()
405
            except:
406
                print(district)
407
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
408
        total_new_area = extra_wf["area [km²]"].sum()
409
        scale_factor = (target_power - total_wind_power) / total_new_area
410
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
411
        extra_wf["voltage"] = "Hochspannung"
412
        summary = summary.append(
413
            {
414
                "state": fed_state,
415
                "target": target_power,
416
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
417
                "MV districts": extra_wf["inst capacity [MW]"].sum(),
418
            },
419
            ignore_index=True,
420
        )
421
        wind_farms = wind_farms.append(extra_wf, ignore_index=True)
422
423
    # Use Definition of thresholds for voltage level assignment
424
    wind_farms["voltage_level"] = 0
425
    for i in wind_farms.index:
426
        try:
427
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
428
                wind_farms.at[i, "voltage_level"] = 5
429
                continue
430
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
431
                wind_farms.at[i, "voltage_level"] = 4
432
                continue
433
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
434
                wind_farms.at[i, "voltage_level"] = 3
435
                continue
436
        except:
437
            print(i)
438
439
    # Look for the maximum id in the table egon_power_plants
440
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
441
    max_id = pd.read_sql(sql, con)
442
    max_id = max_id["max"].iat[0]
443
    if max_id is None:
444
        wind_farm_id = 1
445
    else:
446
        wind_farm_id = int(max_id + 1)
447
448
    # write_table in egon-data database:
449
450
    # Copy relevant columns from wind_farms
451
    insert_wind_farms = wind_farms[
452
        ["inst capacity [MW]", "voltage_level", "centroid"]
453
    ]
454
455
    # Set static column values
456
    insert_wind_farms["carrier"] = source
457
    insert_wind_farms["scenario"] = scenario_year
458
459
    # Change name and crs of geometry column
460
    insert_wind_farms = (
461
        insert_wind_farms.rename(
462
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
463
        )
464
        .set_geometry("geom")
465
        .to_crs(4326)
466
    )
467
468
    # Reset index
469
    insert_wind_farms.index = pd.RangeIndex(
470
        start=wind_farm_id,
471
        stop=wind_farm_id + len(insert_wind_farms),
472
        name="id",
473
    )
474
475
    # Insert into database
476
    insert_wind_farms.reset_index().to_postgis(
477
        "egon_power_plants",
478
        schema="supply",
479
        con=db.engine(),
480
        if_exists="append",
481
    )
482
    return wind_farms, summary
483
484
485
def generate_map():
486
    """Generates a map with the position of all the wind farms
487
488
    Parameters
489
    ----------
490
    *No parameters required
491
492
    """
493
    con = db.engine()
494
495
    # Import wind farms from egon-data
496
    sql = (
497
        "SELECT  carrier, el_capacity, geom, scenario FROM "
498
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
499
    )
500
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
501
        sql, con, geom_col="geom", crs=4326
502
    )
503
    wind_farms_t = wind_farms_t.to_crs(3035)
504
505
    for scenario in wind_farms_t.scenario.unique():
506
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
507
        # mv_districts has geographic info of medium voltage districts in
508
        # Germany
509
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
510
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
511
        mv_districts = mv_districts.to_crs(3035)
512
513
        mv_districts["power"] = 0.0
514
        for std in mv_districts.index:
515
            try:
516
                mv_districts.at[std, "power"] = gpd.clip(
517
                    wind_farms, mv_districts.at[std, "geom"]
518
                ).el_capacity.sum()
519
            except:
520
                print(std)
521
522
        fig, ax = plt.subplots(1, 1)
523
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
524
        mv_districts.plot(
525
            ax=ax,
526
            column="power",
527
            cmap="magma_r",
528
            legend=True,
529
            legend_kwds={
530
                "label": "Installed capacity in MW",
531
                "orientation": "vertical",
532
            },
533
        )
534
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
535
    return 0
536