data.datasets.power_plants.wind_farms - Code Metrics - Inspection of "Merge pull request #1273 from openego/fixes/#1256-..." - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — dev ( 8582b4...82307e )

unknown

created 2025-06-12 13:09 UTC

data.datasets.power_plants.wind_farms B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	595
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	43
eloc	354
dl	0
loc	595
rs	8.96
c	0
b	0
f	0

4 Functions

Rating	Name	Size	Complexity
B	insert()	116	6
F	wind_power_states()	278	21
A	generate_map()	51	4
D	generate_wind_farms()	132	12

How to fix Complexity

from matplotlib import pyplot as plt
from shapely.geometry import MultiPoint, Point
import geopandas as gpd
import numpy as np
import pandas as pd

from egon.data import db
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
import egon.data.config


def insert():
    """Main function. Import power objectives generate results calling the
    functions "generate_wind_farms" and  "wind_power_states".

    Parameters
    ----------
    *No parameters required

    """

    con = db.engine()

    # federal_std has the shapes of the German states
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
    federal_std = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geometry", crs=4326
    )

    # target_power_df has the expected capacity of each federal state
    sql = (
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
        "supply.egon_scenario_capacities"
    )
    target_power_df = pd.read_sql(sql, con)

    # mv_districts has geographic info of medium voltage districts in Germany
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)

    # Delete all the water bodies from the federal states shapes
    federal_std = federal_std[federal_std["gf"] == 4]
    federal_std.drop(columns=["gf"], inplace=True)
    # Filter the potential expected from wind_onshore
    target_power_df = target_power_df[
        target_power_df["carrier"] == "wind_onshore"
    ]
    target_power_df = target_power_df[
        target_power_df["scenario_name"].isin(["eGon2035", "eGon100RE"])
    ]
    target_power_df.set_index("nuts", inplace=True)
    target_power_df["geom"] = Point(0, 0)

    # Join the geometries which belong to the same states
    for std in target_power_df.index:
        df = federal_std[federal_std["nuts"] == std]
        if df.size > 0:
            target_power_df.at[std, "name"] = df["gen"].iat[0]
        else:
            target_power_df.at[std, "name"] = np.nan
        target_power_df.at[std, "geom"] = df.unary_union
    target_power_df = gpd.GeoDataFrame(
        target_power_df, geometry="geom", crs=4326
    )
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
    target_power_df = target_power_df.to_crs(3035)

    # Create the shape for full Germany
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
    target_power_df.at["DE", "name"] = "Germany"
    # Generate WFs for Germany based on potential areas and existing WFs
    wf_areas, wf_areas_ni = generate_wind_farms()

    # Change the columns "geometry" of this GeoDataFrames
    wf_areas.set_geometry("centroid", inplace=True)
    wf_areas_ni.set_geometry("centroid", inplace=True)

    # Create centroids of mv_districts to apply the clip function
    mv_districts["centroid"] = mv_districts.centroid
    mv_districts.set_geometry("centroid", inplace=True)

    summary_t = pd.DataFrame()
    farms = pd.DataFrame()

    if "eGon100RE" in target_power_df["scenario_name"].values:
        wind_farms_state, summary_state = wind_power_states(
            wf_areas,
            wf_areas_ni,
            mv_districts,
            target_power_df.at["DE", "capacity"],
            "eGon100RE",
            "wind_onshore",
            "DE",
        )
        target_power_df = target_power_df[
            target_power_df["scenario_name"] != "eGon100RE"
        ]

    if "eGon2035" in target_power_df["scenario_name"].values:
        # Fit wind farms scenarions for each one of the states
        for bundesland in target_power_df.index:
            state_wf = gpd.clip(wf_areas, target_power_df.at[bundesland, "geom"])
            state_wf_ni = gpd.clip(
                wf_areas_ni, target_power_df.at[bundesland, "geom"]
            )
            state_mv_districts = gpd.clip(
                mv_districts, target_power_df.at[bundesland, "geom"]
            )
            target_power = target_power_df.at[bundesland, "capacity"]
            scenario_year = target_power_df.at[bundesland, "scenario_name"]
            source = target_power_df.at[bundesland, "carrier"]
            fed_state = target_power_df.at[bundesland, "name"]
            wind_farms_state, summary_state = wind_power_states(
                state_wf,
                state_wf_ni,
                state_mv_districts,
                target_power,
                scenario_year,
                source,
                fed_state,
            )
            summary_t = pd.concat([summary_t, summary_state])
            farms = pd.concat([farms, wind_farms_state])

    generate_map()

    return


def generate_wind_farms():
    """Generate wind farms based on existing wind farms.

    Parameters
    ----------
    *No parameters required

    """
    # get config
    cfg = egon.data.config.datasets()["power_plants"]

    # Due to typos in some inputs, some areas of existing wind farms
    # should be discarded using perimeter and area filters
    def filter_current_wf(wf_geometry):
        if wf_geometry.geom_type == "Point":
            return True
        if wf_geometry.geom_type == "Polygon":
            area = wf_geometry.area
            length = wf_geometry.length
            # Filter based on the biggest (# of WT) wind farm
            return (area < 40000000) & (length < 40000)
        if wf_geometry.geom_type == "LineString":
            length = wf_geometry.length
            return length < 1008  # 8 * rotor diameter (8*126m)

    # The function 'wind_farm' returns the connection point of a wind turbine
    def wind_farm(x):
        try:
            return map_ap_wea_farm[x]
        except:
            return np.nan

    # The function 'voltage' returns the voltage level a wind turbine operates
    def voltage(x):
        try:
            return map_ap_wea_voltage[x]
        except:
            return np.nan

    # Connect to the data base
    con = db.engine()
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
    # wf_areas has all the potential areas geometries for wind farms
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
    # bus has the connection points of the wind farms
    bus = pd.read_csv(
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
        index_col="MaStRNummer",
    )
    # Drop all the rows without connection point
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
    # wea has info of each wind turbine in Germany.
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])

    # Delete all the rows without information about geographical location
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
    # Delete all the offshore wind turbines
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
    # the variable map_ap_wea_farm have the connection point of all the
    # available wt in the dataframe bus.
    map_ap_wea_farm = {}
    map_ap_wea_voltage = {}

    wea["connection point"] = wea["LokationMastrNummer"].map(
        bus["NetzanschlusspunktMastrNummer"]
    )
    wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])

    # Create the columns 'geometry' which will have location of each WT in a
    # point type
    wea = gpd.GeoDataFrame(
        wea,
        geometry=gpd.points_from_xy(
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
        ),
    )

    # wf_size storages the number of WT connected to each connection point
    wf_size = wea["connection point"].value_counts()
    # Delete all the connection points with less than 3 WT
    wf_size = wf_size[wf_size >= 3]
    # Filter all the WT which are not part of a wind farm of at least 3 WT
    wea = wea[wea["connection point"].isin(wf_size.index)]
    # current_wfs has all the geometries that represent the existing wind
    # farms
    current_wfs = gpd.GeoDataFrame(
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
    )
    for conn_point, wt_location in wea.groupby("connection point"):
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
            wt_location["geometry"].values
        ).convex_hull
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]

    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
    # The 'filter_wts' is used to discard atypical values for the current wind
    # farms
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
        lambda x: filter_current_wf(x)
    )

    # Apply the filter based on area and perimeter
    current_wfs = current_wfs[current_wfs["filter2"]]
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])

    wf_areas["area [km²]"] = wf_areas.area / 1000000

    # Exclude areas smaller than X m². X was calculated as the area of
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
    min_area = 4 * (0.126**2) * np.sqrt(3)
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]

    # Find the centroid of all the suitable potential areas
    wf_areas["centroid"] = wf_areas.centroid

    # find the potential areas that intersects the convex hulls of current
    # wind farms and assign voltage levels
    wf_areas = wf_areas.to_crs(4326)
    for i in wf_areas.index:
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
        if intersection.any() == False:
            wf_areas.at[i, "voltage"] = "No Intersection"
        else:
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]

    # wf_areas_ni has the potential areas which don't intersect any current
    # wind farm
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
    return wf_areas, wf_areas_ni


def wind_power_states(
    state_wf,
    state_wf_ni,
    state_mv_districts,
    target_power,
    scenario_year,
    source,
    fed_state,
):
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
    database.

    Parameters
    ----------
    state_wf: geodataframe, mandatory
        gdf containing all the wf in the state created based on existing wf.
    state_wf_ni: geodataframe, mandatory
        potential areas in the the state wich don't intersect any existing wf
    state_mv_districts: geodataframe, mandatory
        gdf containing all the MV/HV substations in the state
    target_power: int, mandatory
        Objective power for a state given in MW
    scenario_year: str, mandatory
        name of the scenario
    source: str, mandatory
        Type of energy genetor. Always "Wind_onshore" for this script.
    fed_state: str, mandatory
        Name of the state where the wind farms will be allocated

    """

    def match_district_se(x):
        for sub in hvmv_substation.index:
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
                return hvmv_substation.at[sub, "point"]

    con = db.engine()
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
    # hvmv_substation has the information about HV transmission lines in
    # Germany
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")

    # Set wind potential depending on geographical location
    power_north = 21.05  # MW/km²
    power_south = 16.81  # MW/km²
    # Set a maximum installed capacity to limit the power of big potential
    # areas
    max_power_hv = 120  # in MW
    max_power_mv = 20  # in MW
    # Max distance between WF (connected to MV) and nearest HV substation
    # that allows its connection to HV.
    max_dist_hv = 20000  # in meters

    summary = pd.DataFrame(
        columns=["state", "target", "from existin WF", "MV districts"]
    )

    north = [
        "Schleswig-Holstein",
        "Mecklenburg-Vorpommern",
        "Niedersachsen",
        "Bremen",
        "Hamburg",
    ]

    if fed_state == "DE":
        sql = f"""SELECT * FROM boundaries.vg250_lan
        WHERE gen in {tuple(north)}
        """
        north_states = gpd.GeoDataFrame.from_postgis(
            sql, con, geom_col="geometry"
        )
        north_states.to_crs(3035, inplace=True)
        state_wf["nord"] = state_wf.within(north_states.unary_union)
        state_wf["inst capacity [MW]"] = state_wf.apply(
            lambda x: (
                power_north * x["area [km²]"]
                if x["nord"]
                else power_south * x["area [km²]"]),
            axis=1,
        )
    else:
        if fed_state in north:
            state_wf["inst capacity [MW]"] = (
                power_north * state_wf["area [km²]"]
            )
        else:
            state_wf["inst capacity [MW]"] = (
                power_south * state_wf["area [km²]"]
            )

    # Divide selected areas based on voltage of connection points
    wf_mv = state_wf[
        (state_wf["voltage"] != "Hochspannung")
        & (state_wf["voltage"] != "Hoechstspannung")
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
    ]

    wf_hv = state_wf[
        (state_wf["voltage"] == "Hochspannung")
        | (state_wf["voltage"] == "Hoechstspannung")
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
    ]

    # Wind farms connected to MV network will be connected to HV network if
    # the distance to the closest HV substation is =< max_dist_hv, and the
    # installed capacity is bigger than max_power_mv
    hvmv_substation = hvmv_substation.to_crs(3035)
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
        lambda x: int(x.split(";")[0])
    )
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
    hv_substations = hv_substations.unary_union  # join all the hv_substations
    wf_mv["dist_to_HV"] = (
        state_wf["geom"].to_crs(3035).distance(hv_substations)
    )
    wf_mv_to_hv = wf_mv[
        (wf_mv["dist_to_HV"] <= max_dist_hv)
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
    ]
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
    wf_mv_to_hv["voltage"] = "Hochspannung"

    wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
    wf_mv = wf_mv[
        (wf_mv["dist_to_HV"] > max_dist_hv)
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
    ]
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])

    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_hv else max_power_hv
    )

    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_mv else max_power_mv
    )

    wind_farms = pd.concat([wf_hv, wf_mv])

    # Adjust the total installed capacity to the scenario
    total_wind_power = (
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
    )

    if total_wind_power > target_power:
        scale_factor = target_power / total_wind_power
        wf_mv["inst capacity [MW]"] = (
            wf_mv["inst capacity [MW]"] * scale_factor
        )
        wf_hv["inst capacity [MW]"] = (
            wf_hv["inst capacity [MW]"] * scale_factor
        )
        wind_farms = pd.concat([wf_hv, wf_mv])
        summary = pd.concat(
            [
                summary,
                pd.DataFrame(
                    index=[summary.index.max() + 1],
                    data={
                        "state": fed_state,
                        "target": target_power,
                        "from existin WF": wind_farms[
                            "inst capacity [MW]"
                        ].sum(),
                        "MV districts": 0,
                    },
                ),
            ],
            ignore_index=True,
        )
    else:
        extra_wf = state_mv_districts.copy()
        extra_wf = extra_wf.set_geometry("geom")
        extra_wf["area [km²]"] = 0.0
        for district in extra_wf.index:
            try:
                pot_area_district = gpd.clip(
                    state_wf_ni, extra_wf.at[district, "geom"]
                )
                extra_wf.at[district, "area [km²]"] = pot_area_district[
                    "area [km²]"
                ].sum()
            except:
                print(district)
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
        total_new_area = extra_wf["area [km²]"].sum()
        scale_factor = (target_power - total_wind_power) / total_new_area
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
        extra_wf["voltage"] = "Hochspannung"
        summary = pd.concat(
            [
                summary,
                pd.DataFrame(
                    index=[summary.index.max() + 1],
                    data={
                        "state": fed_state,
                        "target": target_power,
                        "from existin WF": wind_farms[
                            "inst capacity [MW]"
                        ].sum(),
                        "MV districts": extra_wf["inst capacity [MW]"].sum(),
                    },
                ),
            ],
            ignore_index=True,
        )
        extra_wf.to_crs(4326, inplace=True)
        wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)

    # Use Definition of thresholds for voltage level assignment
    wind_farms["voltage_level"] = 0
    for i in wind_farms.index:
        try:
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
                wind_farms.at[i, "voltage_level"] = 5
                continue
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
                wind_farms.at[i, "voltage_level"] = 4
                continue
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
                wind_farms.at[i, "voltage_level"] = 3
                continue
        except:
            print(i)

    # Look for the maximum id in the table egon_power_plants
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
    max_id = pd.read_sql(sql, con)
    max_id = max_id["max"].iat[0]
    if max_id is None:
        wind_farm_id = 1
    else:
        wind_farm_id = int(max_id + 1)

    # write_table in egon-data database:

    # Copy relevant columns from wind_farms
    insert_wind_farms = wind_farms[
        ["inst capacity [MW]", "voltage_level", "centroid"]
    ]

    # Set static column values
    insert_wind_farms["carrier"] = source
    insert_wind_farms["scenario"] = scenario_year

    # Change name and crs of geometry column
    insert_wind_farms = (
        insert_wind_farms.rename(
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
        )
        .set_geometry("geom")
        .to_crs(4326)
    )

    # Reset index
    insert_wind_farms.index = pd.RangeIndex(
        start=wind_farm_id,
        stop=wind_farm_id + len(insert_wind_farms),
        name="id",
    )

    # Delete old wind_onshore generators
    db.execute_sql(
        f"""DELETE FROM supply.egon_power_plants
        WHERE carrier = 'wind_onshore'
        AND scenario = '{scenario_year}'
        """
    )

    # Insert into database
    insert_wind_farms.reset_index().to_postgis(
        "egon_power_plants",
        schema="supply",
        con=db.engine(),
        if_exists="append",
    )
    return wind_farms, summary


def generate_map():
    """Generates a map with the position of all the wind farms

    Parameters
    ----------
    *No parameters required

    """
    con = db.engine()

    # Import wind farms from egon-data
    sql = (
        "SELECT  carrier, el_capacity, geom, scenario FROM "
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
    )
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geom", crs=4326
    )
    wind_farms_t = wind_farms_t.to_crs(3035)

    for scenario in wind_farms_t.scenario.unique():
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
        # mv_districts has geographic info of medium voltage districts in
        # Germany
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
        mv_districts = mv_districts.to_crs(3035)

        mv_districts["power"] = 0.0
        for std in mv_districts.index:
            try:
                mv_districts.at[std, "power"] = gpd.clip(
                    wind_farms, mv_districts.at[std, "geom"]
                ).el_capacity.sum()
            except:
                print(std)

        fig, ax = plt.subplots(1, 1)
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
        mv_districts.plot(
            ax=ax,
            column="power",
            cmap="magma_r",
            legend=True,
            legend_kwds={
                "label": "Installed capacity in MW",
                "orientation": "vertical",
            },
        )
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
    return 0


1			from matplotlib import pyplot as plt
2			from shapely.geometry import MultiPoint, Point
3			import geopandas as gpd
4			import numpy as np
5			import pandas as pd
6
7			from egon.data import db
8			from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
9			import egon.data.config
10
11
12			def insert():
13			"""Main function. Import power objectives generate results calling the
14			functions "generate_wind_farms" and "wind_power_states".
15
16			Parameters
17			----------
18			*No parameters required
19
20			"""
21
22			con = db.engine()
23
24			# federal_std has the shapes of the German states
25			sql = "SELECT gen, gf, nuts, geometry FROM boundaries.vg250_lan"
26			federal_std = gpd.GeoDataFrame.from_postgis(
27			sql, con, geom_col="geometry", crs=4326
28			)
29
30			# target_power_df has the expected capacity of each federal state
31			sql = (
32			"SELECT carrier, capacity, nuts, scenario_name FROM "
33			"supply.egon_scenario_capacities"
34			)
35			target_power_df = pd.read_sql(sql, con)
36
37			# mv_districts has geographic info of medium voltage districts in Germany
38			sql = "SELECT geom FROM grid.egon_mv_grid_district"
39			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
40
41			# Delete all the water bodies from the federal states shapes
42			federal_std = federal_std[federal_std["gf"] == 4]
43			federal_std.drop(columns=["gf"], inplace=True)
44			# Filter the potential expected from wind_onshore
45			target_power_df = target_power_df[
46			target_power_df["carrier"] == "wind_onshore"
47			]
48			target_power_df = target_power_df[
49			target_power_df["scenario_name"].isin(["eGon2035", "eGon100RE"])
50			]
51			target_power_df.set_index("nuts", inplace=True)
52			target_power_df["geom"] = Point(0, 0)
53
54			# Join the geometries which belong to the same states
55			for std in target_power_df.index:
56			df = federal_std[federal_std["nuts"] == std]
57			if df.size > 0:
58			target_power_df.at[std, "name"] = df["gen"].iat[0]
59			else:
60			target_power_df.at[std, "name"] = np.nan
61			target_power_df.at[std, "geom"] = df.unary_union
62			target_power_df = gpd.GeoDataFrame(
63			target_power_df, geometry="geom", crs=4326
64			)
65			target_power_df = target_power_df[target_power_df["capacity"] > 0]
66			target_power_df = target_power_df.to_crs(3035)
67
68			# Create the shape for full Germany
69			target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
70			target_power_df.at["DE", "name"] = "Germany"
71			# Generate WFs for Germany based on potential areas and existing WFs
72			wf_areas, wf_areas_ni = generate_wind_farms()
73
74			# Change the columns "geometry" of this GeoDataFrames
75			wf_areas.set_geometry("centroid", inplace=True)
76			wf_areas_ni.set_geometry("centroid", inplace=True)
77
78			# Create centroids of mv_districts to apply the clip function
79			mv_districts["centroid"] = mv_districts.centroid
80			mv_districts.set_geometry("centroid", inplace=True)
81
82			summary_t = pd.DataFrame()
83			farms = pd.DataFrame()
84
85			if "eGon100RE" in target_power_df["scenario_name"].values:
86			wind_farms_state, summary_state = wind_power_states(
87			wf_areas,
88			wf_areas_ni,
89			mv_districts,
90			target_power_df.at["DE", "capacity"],
91			"eGon100RE",
92			"wind_onshore",
93			"DE",
94			)
95			target_power_df = target_power_df[
96			target_power_df["scenario_name"] != "eGon100RE"
97			]
98
99			if "eGon2035" in target_power_df["scenario_name"].values:
100			# Fit wind farms scenarions for each one of the states
101			for bundesland in target_power_df.index:
102			state_wf = gpd.clip(wf_areas, target_power_df.at[bundesland, "geom"])
103			state_wf_ni = gpd.clip(
104			wf_areas_ni, target_power_df.at[bundesland, "geom"]
105			)
106			state_mv_districts = gpd.clip(
107			mv_districts, target_power_df.at[bundesland, "geom"]
108			)
109			target_power = target_power_df.at[bundesland, "capacity"]
110			scenario_year = target_power_df.at[bundesland, "scenario_name"]
111			source = target_power_df.at[bundesland, "carrier"]
112			fed_state = target_power_df.at[bundesland, "name"]
113			wind_farms_state, summary_state = wind_power_states(
114			state_wf,
115			state_wf_ni,
116			state_mv_districts,
117			target_power,
118			scenario_year,
119			source,
120			fed_state,
121			)
122			summary_t = pd.concat([summary_t, summary_state])
123			farms = pd.concat([farms, wind_farms_state])
124
125			generate_map()
126
127			return
128
129
130			def generate_wind_farms():
131			"""Generate wind farms based on existing wind farms.
132
133			Parameters
134			----------
135			*No parameters required
136
137			"""
138			# get config
139			cfg = egon.data.config.datasets()["power_plants"]
140
141			# Due to typos in some inputs, some areas of existing wind farms
142			# should be discarded using perimeter and area filters
143			def filter_current_wf(wf_geometry):
144			if wf_geometry.geom_type == "Point":
145			return True
146			if wf_geometry.geom_type == "Polygon":
147			area = wf_geometry.area
148			length = wf_geometry.length
149			# Filter based on the biggest (# of WT) wind farm
150			return (area < 40000000) & (length < 40000)
151			if wf_geometry.geom_type == "LineString":
152			length = wf_geometry.length
153			return length < 1008 # 8 * rotor diameter (8*126m)
154
155			# The function 'wind_farm' returns the connection point of a wind turbine
156			def wind_farm(x):
157			try:
158			return map_ap_wea_farm[x]
159			except:
160			return np.nan
161
162			# The function 'voltage' returns the voltage level a wind turbine operates
163			def voltage(x):
164			try:
165			return map_ap_wea_voltage[x]
166			except:
167			return np.nan
168
169			# Connect to the data base
170			con = db.engine()
171			sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
172			# wf_areas has all the potential areas geometries for wind farms
173			wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
174			# bus has the connection points of the wind farms
175			bus = pd.read_csv(
176			WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
177			index_col="MaStRNummer",
178			)
179			# Drop all the rows without connection point
180			bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
181			# wea has info of each wind turbine in Germany.
182			wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
183
184			# Delete all the rows without information about geographical location
185			wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
186			# Delete all the offshore wind turbines
187			wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
188			# the variable map_ap_wea_farm have the connection point of all the
189			# available wt in the dataframe bus.
190			map_ap_wea_farm = {}
191			map_ap_wea_voltage = {}
192
193			wea["connection point"] = wea["LokationMastrNummer"].map(
194			bus["NetzanschlusspunktMastrNummer"]
195			)
196			wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])
197
198			# Create the columns 'geometry' which will have location of each WT in a
199			# point type
200			wea = gpd.GeoDataFrame(
201			wea,
202			geometry=gpd.points_from_xy(
203			wea["Laengengrad"], wea["Breitengrad"], crs=4326
204			),
205			)
206
207			# wf_size storages the number of WT connected to each connection point
208			wf_size = wea["connection point"].value_counts()
209			# Delete all the connection points with less than 3 WT
210			wf_size = wf_size[wf_size >= 3]
211			# Filter all the WT which are not part of a wind farm of at least 3 WT
212			wea = wea[wea["connection point"].isin(wf_size.index)]
213			# current_wfs has all the geometries that represent the existing wind
214			# farms
215			current_wfs = gpd.GeoDataFrame(
216			index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
217			)
218			for conn_point, wt_location in wea.groupby("connection point"):
219			current_wfs.at[conn_point, "geometry"] = MultiPoint(
220			wt_location["geometry"].values
221			).convex_hull
222			current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
223
224			current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
225			current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
226			current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
227			# The 'filter_wts' is used to discard atypical values for the current wind
228			# farms
229			current_wfs["filter2"] = current_wfs["geometry2"].apply(
230			lambda x: filter_current_wf(x)
231			)
232
233			# Apply the filter based on area and perimeter
234			current_wfs = current_wfs[current_wfs["filter2"]]
235			current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
236
237			wf_areas["area [km²]"] = wf_areas.area / 1000000
238
239			# Exclude areas smaller than X m². X was calculated as the area of
240			# 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
241			min_area = 4 * (0.126*2) np.sqrt(3)
242			wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
243
244			# Find the centroid of all the suitable potential areas
245			wf_areas["centroid"] = wf_areas.centroid
246
247			# find the potential areas that intersects the convex hulls of current
248			# wind farms and assign voltage levels
249			wf_areas = wf_areas.to_crs(4326)
250			for i in wf_areas.index:
251			intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
252			if intersection.any() == False:
253			wf_areas.at[i, "voltage"] = "No Intersection"
254			else:
255			wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
256
257			# wf_areas_ni has the potential areas which don't intersect any current
258			# wind farm
259			wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
260			wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
261			return wf_areas, wf_areas_ni
262
263
264			def wind_power_states(
265			state_wf,
266			state_wf_ni,
267			state_mv_districts,
268			target_power,
269			scenario_year,
270			source,
271			fed_state,
272			):
273			"""Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
274			database.
275
276			Parameters
277			----------
278			state_wf: geodataframe, mandatory
279			gdf containing all the wf in the state created based on existing wf.
280			state_wf_ni: geodataframe, mandatory
281			potential areas in the the state wich don't intersect any existing wf
282			state_mv_districts: geodataframe, mandatory
283			gdf containing all the MV/HV substations in the state
284			target_power: int, mandatory
285			Objective power for a state given in MW
286			scenario_year: str, mandatory
287			name of the scenario
288			source: str, mandatory
289			Type of energy genetor. Always "Wind_onshore" for this script.
290			fed_state: str, mandatory
291			Name of the state where the wind farms will be allocated
292
293			"""
294
295			def match_district_se(x):
296			for sub in hvmv_substation.index:
297			if x["geom"].contains(hvmv_substation.at[sub, "point"]):
298			return hvmv_substation.at[sub, "point"]
299
300			con = db.engine()
301			sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
302			# hvmv_substation has the information about HV transmission lines in
303			# Germany
304			hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
305
306			# Set wind potential depending on geographical location
307			power_north = 21.05 # MW/km²
308			power_south = 16.81 # MW/km²
309			# Set a maximum installed capacity to limit the power of big potential
310			# areas
311			max_power_hv = 120 # in MW
312			max_power_mv = 20 # in MW
313			# Max distance between WF (connected to MV) and nearest HV substation
314			# that allows its connection to HV.
315			max_dist_hv = 20000 # in meters
316
317			summary = pd.DataFrame(
318			columns=["state", "target", "from existin WF", "MV districts"]
319			)
320
321			north = [
322			"Schleswig-Holstein",
323			"Mecklenburg-Vorpommern",
324			"Niedersachsen",
325			"Bremen",
326			"Hamburg",
327			]
328
329			if fed_state == "DE":
330			sql = f"""SELECT * FROM boundaries.vg250_lan
331			WHERE gen in {tuple(north)}
332			"""
333			north_states = gpd.GeoDataFrame.from_postgis(
334			sql, con, geom_col="geometry"
335			)
336			north_states.to_crs(3035, inplace=True)
337			state_wf["nord"] = state_wf.within(north_states.unary_union)
338			state_wf["inst capacity [MW]"] = state_wf.apply(
339			lambda x: (
340			power_north * x["area [km²]"]
341			if x["nord"]
342			else power_south * x["area [km²]"]),
343			axis=1,
344			)
345			else:
346			if fed_state in north:
347			state_wf["inst capacity [MW]"] = (
348			power_north * state_wf["area [km²]"]
349			)
350			else:
351			state_wf["inst capacity [MW]"] = (
352			power_south * state_wf["area [km²]"]
353			)
354
355			# Divide selected areas based on voltage of connection points
356			wf_mv = state_wf[
357			(state_wf["voltage"] != "Hochspannung")
358			& (state_wf["voltage"] != "Hoechstspannung")
359			& (state_wf["voltage"] != "UmspannungZurHochspannung")
360			]
361
362			wf_hv = state_wf[
363			(state_wf["voltage"] == "Hochspannung")
364			\| (state_wf["voltage"] == "Hoechstspannung")
365			\| (state_wf["voltage"] == "UmspannungZurHochspannung")
366			]
367
368			# Wind farms connected to MV network will be connected to HV network if
369			# the distance to the closest HV substation is =< max_dist_hv, and the
370			# installed capacity is bigger than max_power_mv
371			hvmv_substation = hvmv_substation.to_crs(3035)
372			hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
373			lambda x: int(x.split(";")[0])
374			)
375			hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
376			hv_substations = hv_substations.unary_union # join all the hv_substations
377			wf_mv["dist_to_HV"] = (
378			state_wf["geom"].to_crs(3035).distance(hv_substations)
379			)
380			wf_mv_to_hv = wf_mv[
381			(wf_mv["dist_to_HV"] <= max_dist_hv)
382			& (wf_mv["inst capacity [MW]"] >= max_power_mv)
383			]
384			wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
385			wf_mv_to_hv["voltage"] = "Hochspannung"
386
387			wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
388			wf_mv = wf_mv[
389			(wf_mv["dist_to_HV"] > max_dist_hv)
390			\| (wf_mv["inst capacity [MW]"] < max_power_mv)
391			]
392			wf_mv = wf_mv.drop(columns=["dist_to_HV"])
393
394			wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
395			lambda x: x if x < max_power_hv else max_power_hv
396			)
397
398			wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
399			lambda x: x if x < max_power_mv else max_power_mv
400			)
401
402			wind_farms = pd.concat([wf_hv, wf_mv])
403
404			# Adjust the total installed capacity to the scenario
405			total_wind_power = (
406			wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
407			)
408
409			if total_wind_power > target_power:
410			scale_factor = target_power / total_wind_power
411			wf_mv["inst capacity [MW]"] = (
412			wf_mv["inst capacity [MW]"] * scale_factor
413			)
414			wf_hv["inst capacity [MW]"] = (
415			wf_hv["inst capacity [MW]"] * scale_factor
416			)
417			wind_farms = pd.concat([wf_hv, wf_mv])
418			summary = pd.concat(
419			[
420			summary,
421			pd.DataFrame(
422			index=[summary.index.max() + 1],
423			data={
424			"state": fed_state,
425			"target": target_power,
426			"from existin WF": wind_farms[
427			"inst capacity [MW]"
428			].sum(),
429			"MV districts": 0,
430			},
431			),
432			],
433			ignore_index=True,
434			)
435			else:
436			extra_wf = state_mv_districts.copy()
437			extra_wf = extra_wf.set_geometry("geom")
438			extra_wf["area [km²]"] = 0.0
439			for district in extra_wf.index:
440			try:
441			pot_area_district = gpd.clip(
442			state_wf_ni, extra_wf.at[district, "geom"]
443			)
444			extra_wf.at[district, "area [km²]"] = pot_area_district[
445			"area [km²]"
446			].sum()
447			except:
448			print(district)
449			extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
450			total_new_area = extra_wf["area [km²]"].sum()
451			scale_factor = (target_power - total_wind_power) / total_new_area
452			extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
453			extra_wf["voltage"] = "Hochspannung"
454			summary = pd.concat(
455			[
456			summary,
457			pd.DataFrame(
458			index=[summary.index.max() + 1],
459			data={
460			"state": fed_state,
461			"target": target_power,
462			"from existin WF": wind_farms[
463			"inst capacity [MW]"
464			].sum(),
465			"MV districts": extra_wf["inst capacity [MW]"].sum(),
466			},
467			),
468			],
469			ignore_index=True,
470			)
471			extra_wf.to_crs(4326, inplace=True)
472			wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)
473
474			# Use Definition of thresholds for voltage level assignment
475			wind_farms["voltage_level"] = 0
476			for i in wind_farms.index:
477			try:
478			if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
479			wind_farms.at[i, "voltage_level"] = 5
480			continue
481			if wind_farms.at[i, "inst capacity [MW]"] < 20:
482			wind_farms.at[i, "voltage_level"] = 4
483			continue
484			if wind_farms.at[i, "inst capacity [MW]"] >= 20:
485			wind_farms.at[i, "voltage_level"] = 3
486			continue
487			except:
488			print(i)
489
490			# Look for the maximum id in the table egon_power_plants
491			sql = "SELECT MAX(id) FROM supply.egon_power_plants"
492			max_id = pd.read_sql(sql, con)
493			max_id = max_id["max"].iat[0]
494			if max_id is None:
495			wind_farm_id = 1
496			else:
497			wind_farm_id = int(max_id + 1)
498
499			# write_table in egon-data database:
500
501			# Copy relevant columns from wind_farms
502			insert_wind_farms = wind_farms[
503			["inst capacity [MW]", "voltage_level", "centroid"]
504			]
505
506			# Set static column values
507			insert_wind_farms["carrier"] = source
508			insert_wind_farms["scenario"] = scenario_year
509
510			# Change name and crs of geometry column
511			insert_wind_farms = (
512			insert_wind_farms.rename(
513			{"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
514			)
515			.set_geometry("geom")
516			.to_crs(4326)
517			)
518
519			# Reset index
520			insert_wind_farms.index = pd.RangeIndex(
521			start=wind_farm_id,
522			stop=wind_farm_id + len(insert_wind_farms),
523			name="id",
524			)
525
526			# Delete old wind_onshore generators
527			db.execute_sql(
528			f"""DELETE FROM supply.egon_power_plants
529			WHERE carrier = 'wind_onshore'
530			AND scenario = '{scenario_year}'
531			"""
532			)
533
534			# Insert into database
535			insert_wind_farms.reset_index().to_postgis(
536			"egon_power_plants",
537			schema="supply",
538			con=db.engine(),
539			if_exists="append",
540			)
541			return wind_farms, summary
542
543
544			def generate_map():
545			"""Generates a map with the position of all the wind farms
546
547			Parameters
548			----------
549			*No parameters required
550
551			"""
552			con = db.engine()
553
554			# Import wind farms from egon-data
555			sql = (
556			"SELECT carrier, el_capacity, geom, scenario FROM "
557			"supply.egon_power_plants WHERE carrier = 'wind_onshore'"
558			)
559			wind_farms_t = gpd.GeoDataFrame.from_postgis(
560			sql, con, geom_col="geom", crs=4326
561			)
562			wind_farms_t = wind_farms_t.to_crs(3035)
563
564			for scenario in wind_farms_t.scenario.unique():
565			wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
566			# mv_districts has geographic info of medium voltage districts in
567			# Germany
568			sql = "SELECT geom FROM grid.egon_mv_grid_district"
569			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
570			mv_districts = mv_districts.to_crs(3035)
571
572			mv_districts["power"] = 0.0
573			for std in mv_districts.index:
574			try:
575			mv_districts.at[std, "power"] = gpd.clip(
576			wind_farms, mv_districts.at[std, "geom"]
577			).el_capacity.sum()
578			except:
579			print(std)
580
581			fig, ax = plt.subplots(1, 1)
582			mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
583			mv_districts.plot(
584			ax=ax,
585			column="power",
586			cmap="magma_r",
587			legend=True,
588			legend_kwds={
589			"label": "Installed capacity in MW",
590			"orientation": "vertical",
591			},
592			)
593			plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
594			return 0
595

openego / eGon-data

Push — dev ( 8582b4...82307e )

data.datasets.power_plants.wind_farms B

Complexity

Size/Duplication

Importance

4 Functions

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like