data.datasets.power_plants.wind_farms.insert() - Code Metrics - Inspection of "Merge pull request #1348 from openego/release-v2.0..." - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — dev ( f143c8...357bd8 )

unknown

created 2025-08-20 07:42 UTC

data.datasets.power_plants.wind_farms.insert() C

↳ Parent: data.datasets.power_plants.wind_farms

Complexity

Conditions

Size

Total Lines	118
Code Lines	72

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	72
dl	0
loc	118
rs	6.983
c	0
b	0
f	0
cc	6
nop	0

How to fix Long Method

import re

from matplotlib import pyplot as plt
from shapely.geometry import MultiPoint, Point
import geopandas as gpd
import numpy as np
import pandas as pd

from egon.data import db
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
import egon.data.config


def insert():
    """Main function. Import power objectives generate results calling the
    functions "generate_wind_farms" and  "wind_power_states".

    Parameters
    ----------
    *No parameters required

    """

    con = db.engine()

    # federal_std has the shapes of the German states
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
    federal_std = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geometry", crs=4326
    )

    # target_power_df has the expected capacity of each federal state
    sql = (
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
        "supply.egon_scenario_capacities"
    )
    target_power_df = pd.read_sql(sql, con)

    # mv_districts has geographic info of medium voltage districts in Germany
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)

    # Delete all the water bodies from the federal states shapes
    federal_std = federal_std[federal_std["gf"] == 4]
    federal_std.drop(columns=["gf"], inplace=True)
    # Filter the potential expected from wind_onshore
    target_power_df = target_power_df[
        target_power_df["carrier"] == "wind_onshore"
    ]
    target_power_df = target_power_df[
        target_power_df["scenario_name"].isin(["eGon2035", "eGon100RE"])
    ]
    target_power_df.set_index("nuts", inplace=True)
    target_power_df["geom"] = Point(0, 0)

    # Join the geometries which belong to the same states
    for std in target_power_df.index:
        df = federal_std[federal_std["nuts"] == std]
        if df.size > 0:
            target_power_df.at[std, "name"] = df["gen"].iat[0]
        else:
            target_power_df.at[std, "name"] = np.nan
        target_power_df.at[std, "geom"] = df.unary_union
    target_power_df = gpd.GeoDataFrame(
        target_power_df, geometry="geom", crs=4326
    )
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
    target_power_df = target_power_df.to_crs(3035)

    # Create the shape for full Germany
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
    target_power_df.at["DE", "name"] = "Germany"
    # Generate WFs for Germany based on potential areas and existing WFs
    wf_areas, wf_areas_ni = generate_wind_farms()

    # Change the columns "geometry" of this GeoDataFrames
    wf_areas.set_geometry("centroid", inplace=True)
    wf_areas_ni.set_geometry("centroid", inplace=True)

    # Create centroids of mv_districts to apply the clip function
    mv_districts["centroid"] = mv_districts.centroid
    mv_districts.set_geometry("centroid", inplace=True)

    summary_t = pd.DataFrame()
    farms = pd.DataFrame()

    if "eGon100RE" in target_power_df["scenario_name"].values:
        wind_farms_state, summary_state = wind_power_states(
            wf_areas,
            wf_areas_ni,
            mv_districts,
            target_power_df.at["DE", "capacity"],
            "eGon100RE",
            "wind_onshore",
            "DE",
        )
        target_power_df = target_power_df[
            target_power_df["scenario_name"] != "eGon100RE"
        ]

    if "eGon2035" in target_power_df["scenario_name"].values:
        # Fit wind farms scenarions for each one of the states
        for bundesland in target_power_df.index:
            state_wf = gpd.clip(
                wf_areas, target_power_df.at[bundesland, "geom"]
            )
            state_wf_ni = gpd.clip(
                wf_areas_ni, target_power_df.at[bundesland, "geom"]
            )
            state_mv_districts = gpd.clip(
                mv_districts, target_power_df.at[bundesland, "geom"]
            )
            target_power = target_power_df.at[bundesland, "capacity"]
            scenario_year = target_power_df.at[bundesland, "scenario_name"]
            source = target_power_df.at[bundesland, "carrier"]
            fed_state = target_power_df.at[bundesland, "name"]
            wind_farms_state, summary_state = wind_power_states(
                state_wf,
                state_wf_ni,
                state_mv_districts,
                target_power,
                scenario_year,
                source,
                fed_state,
            )
            summary_t = pd.concat([summary_t, summary_state])
            farms = pd.concat([farms, wind_farms_state])

    generate_map()

    return


def generate_wind_farms():
    """Generate wind farms based on existing wind farms.

    Parameters
    ----------
    *No parameters required

    """
    # get config
    cfg = egon.data.config.datasets()["power_plants"]

    # Due to typos in some inputs, some areas of existing wind farms
    # should be discarded using perimeter and area filters
    def filter_current_wf(wf_geometry):
        if wf_geometry.geom_type == "Point":
            return True
        if wf_geometry.geom_type == "Polygon":
            area = wf_geometry.area
            length = wf_geometry.length
            # Filter based on the biggest (# of WT) wind farm
            return (area < 40000000) & (length < 40000)
        if wf_geometry.geom_type == "LineString":
            length = wf_geometry.length
            return length < 1008  # 8 * rotor diameter (8*126m)

    # The function 'wind_farm' returns the connection point of a wind turbine
    def wind_farm(x):
        try:
            return map_ap_wea_farm[x]
        except:
            return np.nan

    # The function 'voltage' returns the voltage level a wind turbine operates
    def voltage(x):
        try:
            return map_ap_wea_voltage[x]
        except:
            return np.nan

    # Connect to the data base
    con = db.engine()
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
    # wf_areas has all the potential areas geometries for wind farms
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
    # bus has the connection points of the wind farms
    bus = pd.read_csv(
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
        index_col="MaStRNummer",
    )
    # Drop all the rows without connection point
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
    # wea has info of each wind turbine in Germany.
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])

    # Delete all the rows without information about geographical location
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
    # Delete all the offshore wind turbines
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
    # the variable map_ap_wea_farm have the connection point of all the
    # available wt in the dataframe bus.
    map_ap_wea_farm = {}
    map_ap_wea_voltage = {}

    wea["connection point"] = wea["LokationMastrNummer"].map(
        bus["NetzanschlusspunktMastrNummer"]
    )
    wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])

    # Create the columns 'geometry' which will have location of each WT in a
    # point type
    wea = gpd.GeoDataFrame(
        wea,
        geometry=gpd.points_from_xy(
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
        ),
    )

    # wf_size storages the number of WT connected to each connection point
    wf_size = wea["connection point"].value_counts()
    # Delete all the connection points with less than 3 WT
    wf_size = wf_size[wf_size >= 3]
    # Filter all the WT which are not part of a wind farm of at least 3 WT
    wea = wea[wea["connection point"].isin(wf_size.index)]
    # current_wfs has all the geometries that represent the existing wind
    # farms
    current_wfs = gpd.GeoDataFrame(
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
    )
    for conn_point, wt_location in wea.groupby("connection point"):
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
            wt_location["geometry"].values
        ).convex_hull
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]

    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
    # The 'filter_wts' is used to discard atypical values for the current wind
    # farms
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
        lambda x: filter_current_wf(x)
    )

    # Apply the filter based on area and perimeter
    current_wfs = current_wfs[current_wfs["filter2"]]
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])

    wf_areas["area [km²]"] = wf_areas.area / 1000000

    # Exclude areas smaller than X m². X was calculated as the area of
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
    min_area = 4 * (0.126**2) * np.sqrt(3)
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]

    # Find the centroid of all the suitable potential areas
    wf_areas["centroid"] = wf_areas.centroid

    # find the potential areas that intersects the convex hulls of current
    # wind farms and assign voltage levels
    wf_areas = wf_areas.to_crs(4326)
    for i in wf_areas.index:
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
        if intersection.any() == False:
            wf_areas.at[i, "voltage"] = "No Intersection"
        else:
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]

    # wf_areas_ni has the potential areas which don't intersect any current
    # wind farm
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
    return wf_areas, wf_areas_ni


def wind_power_states(
    state_wf,
    state_wf_ni,
    state_mv_districts,
    target_power,
    scenario_year,
    source,
    fed_state,
):
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
    database.

    Parameters
    ----------
    state_wf: geodataframe, mandatory
        gdf containing all the wf in the state created based on existing wf.
    state_wf_ni: geodataframe, mandatory
        potential areas in the the state wich don't intersect any existing wf
    state_mv_districts: geodataframe, mandatory
        gdf containing all the MV/HV substations in the state
    target_power: int, mandatory
        Objective power for a state given in MW
    scenario_year: str, mandatory
        name of the scenario
    source: str, mandatory
        Type of energy genetor. Always "Wind_onshore" for this script.
    fed_state: str, mandatory
        Name of the state where the wind farms will be allocated

    """

    def match_district_se(x):
        for sub in hvmv_substation.index:
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
                return hvmv_substation.at[sub, "point"]

    con = db.engine()
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
    # hvmv_substation has the information about HV transmission lines in
    # Germany
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")

    # Set wind potential depending on geographical location
    power_north = 21.05  # MW/km²
    power_south = 16.81  # MW/km²
    # Set a maximum installed capacity to limit the power of big potential
    # areas
    max_power_hv = 120  # in MW
    max_power_mv = 20  # in MW
    # Max distance between WF (connected to MV) and nearest HV substation
    # that allows its connection to HV.
    max_dist_hv = 20000  # in meters

    summary = pd.DataFrame(
        columns=["state", "target", "from existin WF", "MV districts"]
    )

    north = [
        "Schleswig-Holstein",
        "Mecklenburg-Vorpommern",
        "Niedersachsen",
        "Bremen",
        "Hamburg",
    ]

    if fed_state == "DE":
        sql = f"""SELECT * FROM boundaries.vg250_lan
        WHERE gen in {tuple(north)}
        """
        north_states = gpd.GeoDataFrame.from_postgis(
            sql, con, geom_col="geometry"
        )
        north_states.to_crs(3035, inplace=True)
        state_wf["nord"] = state_wf.within(north_states.unary_union)
        state_wf["inst capacity [MW]"] = state_wf.apply(
            lambda x: (
                power_north * x["area [km²]"]
                if x["nord"]
                else power_south * x["area [km²]"]
            ),
            axis=1,
        )
    else:
        if fed_state in north:
            state_wf["inst capacity [MW]"] = (
                power_north * state_wf["area [km²]"]
            )
        else:
            state_wf["inst capacity [MW]"] = (
                power_south * state_wf["area [km²]"]
            )

    # Divide selected areas based on voltage of connection points
    wf_mv = state_wf[
        (state_wf["voltage"] != "Hochspannung")
        & (state_wf["voltage"] != "Hoechstspannung")
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
    ]

    wf_hv = state_wf[
        (state_wf["voltage"] == "Hochspannung")
        | (state_wf["voltage"] == "Hoechstspannung")
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
    ]

    # Wind farms connected to MV network will be connected to HV network if
    # the distance to the closest HV substation is =< max_dist_hv, and the
    # installed capacity is bigger than max_power_mv
    hvmv_substation = hvmv_substation.to_crs(3035)
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
        lambda x: int(re.split(";|:", x)[0])
    )
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
    hv_substations = hv_substations.unary_union  # join all the hv_substations
    wf_mv["dist_to_HV"] = (
        state_wf["geom"].to_crs(3035).distance(hv_substations)
    )
    wf_mv_to_hv = wf_mv[
        (wf_mv["dist_to_HV"] <= max_dist_hv)
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
    ]
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
    wf_mv_to_hv["voltage"] = "Hochspannung"

    wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
    wf_mv = wf_mv[
        (wf_mv["dist_to_HV"] > max_dist_hv)
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
    ]
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])

    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_hv else max_power_hv
    )

    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_mv else max_power_mv
    )

    wind_farms = pd.concat([wf_hv, wf_mv])

    # Adjust the total installed capacity to the scenario
    total_wind_power = (
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
    )

    if total_wind_power > target_power:
        scale_factor = target_power / total_wind_power
        wf_mv["inst capacity [MW]"] = (
            wf_mv["inst capacity [MW]"] * scale_factor
        )
        wf_hv["inst capacity [MW]"] = (
            wf_hv["inst capacity [MW]"] * scale_factor
        )
        wind_farms = pd.concat([wf_hv, wf_mv])
        summary = pd.concat(
            [
                summary,
                pd.DataFrame(
                    index=[summary.index.max() + 1],
                    data={
                        "state": fed_state,
                        "target": target_power,
                        "from existin WF": wind_farms[
                            "inst capacity [MW]"
                        ].sum(),
                        "MV districts": 0,
                    },
                ),
            ],
            ignore_index=True,
        )
    else:
        extra_wf = state_mv_districts.copy()
        extra_wf = extra_wf.set_geometry("geom")
        extra_wf["area [km²]"] = 0.0
        for district in extra_wf.index:
            try:
                pot_area_district = gpd.clip(
                    state_wf_ni, extra_wf.at[district, "geom"]
                )
                extra_wf.at[district, "area [km²]"] = pot_area_district[
                    "area [km²]"
                ].sum()
            except:
                print(district)
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
        total_new_area = extra_wf["area [km²]"].sum()
        scale_factor = (target_power - total_wind_power) / total_new_area
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
        extra_wf["voltage"] = "Hochspannung"
        summary = pd.concat(
            [
                summary,
                pd.DataFrame(
                    index=[summary.index.max() + 1],
                    data={
                        "state": fed_state,
                        "target": target_power,
                        "from existin WF": wind_farms[
                            "inst capacity [MW]"
                        ].sum(),
                        "MV districts": extra_wf["inst capacity [MW]"].sum(),
                    },
                ),
            ],
            ignore_index=True,
        )
        extra_wf.to_crs(4326, inplace=True)
        wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)

    # Use Definition of thresholds for voltage level assignment
    wind_farms["voltage_level"] = 0
    for i in wind_farms.index:
        try:
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
                wind_farms.at[i, "voltage_level"] = 5
                continue
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
                wind_farms.at[i, "voltage_level"] = 4
                continue
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
                wind_farms.at[i, "voltage_level"] = 3
                continue
        except:
            print(i)

    # Look for the maximum id in the table egon_power_plants
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
    max_id = pd.read_sql(sql, con)
    max_id = max_id["max"].iat[0]
    if max_id is None:
        wind_farm_id = 1
    else:
        wind_farm_id = int(max_id + 1)

    # write_table in egon-data database:

    # Copy relevant columns from wind_farms
    insert_wind_farms = wind_farms[
        ["inst capacity [MW]", "voltage_level", "centroid"]
    ]

    # Set static column values
    insert_wind_farms["carrier"] = source
    insert_wind_farms["scenario"] = scenario_year

    # Change name and crs of geometry column
    insert_wind_farms = (
        insert_wind_farms.rename(
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
        )
        .set_geometry("geom")
        .to_crs(4326)
    )

    # Reset index
    insert_wind_farms.index = pd.RangeIndex(
        start=wind_farm_id,
        stop=wind_farm_id + len(insert_wind_farms),
        name="id",
    )

    # Delete old wind_onshore generators
    db.execute_sql(
        f"""DELETE FROM supply.egon_power_plants
        WHERE carrier = 'wind_onshore'
        AND scenario = '{scenario_year}'
        """
    )

    # Insert into database
    insert_wind_farms.reset_index().to_postgis(
        "egon_power_plants",
        schema="supply",
        con=db.engine(),
        if_exists="append",
    )
    return wind_farms, summary


def generate_map():
    """Generates a map with the position of all the wind farms

    Parameters
    ----------
    *No parameters required

    """
    con = db.engine()

    # Import wind farms from egon-data
    sql = (
        "SELECT  carrier, el_capacity, geom, scenario FROM "
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
    )
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geom", crs=4326
    )
    wind_farms_t = wind_farms_t.to_crs(3035)

    for scenario in wind_farms_t.scenario.unique():
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
        # mv_districts has geographic info of medium voltage districts in
        # Germany
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
        mv_districts = mv_districts.to_crs(3035)

        mv_districts["power"] = 0.0
        for std in mv_districts.index:
            try:
                mv_districts.at[std, "power"] = gpd.clip(
                    wind_farms, mv_districts.at[std, "geom"]
                ).el_capacity.sum()
            except:
                print(std)

        fig, ax = plt.subplots(1, 1)
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
        mv_districts.plot(
            ax=ax,
            column="power",
            cmap="magma_r",
            legend=True,
            legend_kwds={
                "label": "Installed capacity in MW",
                "orientation": "vertical",
            },
        )
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
    return 0


1			import re
2
3			from matplotlib import pyplot as plt
4			from shapely.geometry import MultiPoint, Point
5			import geopandas as gpd
6			import numpy as np
7			import pandas as pd
8
9			from egon.data import db
10			from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
11			import egon.data.config
12
13
14			def insert():
15			"""Main function. Import power objectives generate results calling the
16			functions "generate_wind_farms" and "wind_power_states".
17
18			Parameters
19			----------
20			*No parameters required
21
22			"""
23
24			con = db.engine()
25
26			# federal_std has the shapes of the German states
27			sql = "SELECT gen, gf, nuts, geometry FROM boundaries.vg250_lan"
28			federal_std = gpd.GeoDataFrame.from_postgis(
29			sql, con, geom_col="geometry", crs=4326
30			)
31
32			# target_power_df has the expected capacity of each federal state
33			sql = (
34			"SELECT carrier, capacity, nuts, scenario_name FROM "
35			"supply.egon_scenario_capacities"
36			)
37			target_power_df = pd.read_sql(sql, con)
38
39			# mv_districts has geographic info of medium voltage districts in Germany
40			sql = "SELECT geom FROM grid.egon_mv_grid_district"
41			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
42
43			# Delete all the water bodies from the federal states shapes
44			federal_std = federal_std[federal_std["gf"] == 4]
45			federal_std.drop(columns=["gf"], inplace=True)
46			# Filter the potential expected from wind_onshore
47			target_power_df = target_power_df[
48			target_power_df["carrier"] == "wind_onshore"
49			]
50			target_power_df = target_power_df[
51			target_power_df["scenario_name"].isin(["eGon2035", "eGon100RE"])
52			]
53			target_power_df.set_index("nuts", inplace=True)
54			target_power_df["geom"] = Point(0, 0)
55
56			# Join the geometries which belong to the same states
57			for std in target_power_df.index:
58			df = federal_std[federal_std["nuts"] == std]
59			if df.size > 0:
60			target_power_df.at[std, "name"] = df["gen"].iat[0]
61			else:
62			target_power_df.at[std, "name"] = np.nan
63			target_power_df.at[std, "geom"] = df.unary_union
64			target_power_df = gpd.GeoDataFrame(
65			target_power_df, geometry="geom", crs=4326
66			)
67			target_power_df = target_power_df[target_power_df["capacity"] > 0]
68			target_power_df = target_power_df.to_crs(3035)
69
70			# Create the shape for full Germany
71			target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
72			target_power_df.at["DE", "name"] = "Germany"
73			# Generate WFs for Germany based on potential areas and existing WFs
74			wf_areas, wf_areas_ni = generate_wind_farms()
75
76			# Change the columns "geometry" of this GeoDataFrames
77			wf_areas.set_geometry("centroid", inplace=True)
78			wf_areas_ni.set_geometry("centroid", inplace=True)
79
80			# Create centroids of mv_districts to apply the clip function
81			mv_districts["centroid"] = mv_districts.centroid
82			mv_districts.set_geometry("centroid", inplace=True)
83
84			summary_t = pd.DataFrame()
85			farms = pd.DataFrame()
86
87			if "eGon100RE" in target_power_df["scenario_name"].values:
88			wind_farms_state, summary_state = wind_power_states(
89			wf_areas,
90			wf_areas_ni,
91			mv_districts,
92			target_power_df.at["DE", "capacity"],
93			"eGon100RE",
94			"wind_onshore",
95			"DE",
96			)
97			target_power_df = target_power_df[
98			target_power_df["scenario_name"] != "eGon100RE"
99			]
100
101			if "eGon2035" in target_power_df["scenario_name"].values:
102			# Fit wind farms scenarions for each one of the states
103			for bundesland in target_power_df.index:
104			state_wf = gpd.clip(
105			wf_areas, target_power_df.at[bundesland, "geom"]
106			)
107			state_wf_ni = gpd.clip(
108			wf_areas_ni, target_power_df.at[bundesland, "geom"]
109			)
110			state_mv_districts = gpd.clip(
111			mv_districts, target_power_df.at[bundesland, "geom"]
112			)
113			target_power = target_power_df.at[bundesland, "capacity"]
114			scenario_year = target_power_df.at[bundesland, "scenario_name"]
115			source = target_power_df.at[bundesland, "carrier"]
116			fed_state = target_power_df.at[bundesland, "name"]
117			wind_farms_state, summary_state = wind_power_states(
118			state_wf,
119			state_wf_ni,
120			state_mv_districts,
121			target_power,
122			scenario_year,
123			source,
124			fed_state,
125			)
126			summary_t = pd.concat([summary_t, summary_state])
127			farms = pd.concat([farms, wind_farms_state])
128
129			generate_map()
130
131			return
132
133
134			def generate_wind_farms():
135			"""Generate wind farms based on existing wind farms.
136
137			Parameters
138			----------
139			*No parameters required
140
141			"""
142			# get config
143			cfg = egon.data.config.datasets()["power_plants"]
144
145			# Due to typos in some inputs, some areas of existing wind farms
146			# should be discarded using perimeter and area filters
147			def filter_current_wf(wf_geometry):
148			if wf_geometry.geom_type == "Point":
149			return True
150			if wf_geometry.geom_type == "Polygon":
151			area = wf_geometry.area
152			length = wf_geometry.length
153			# Filter based on the biggest (# of WT) wind farm
154			return (area < 40000000) & (length < 40000)
155			if wf_geometry.geom_type == "LineString":
156			length = wf_geometry.length
157			return length < 1008 # 8 * rotor diameter (8*126m)
158
159			# The function 'wind_farm' returns the connection point of a wind turbine
160			def wind_farm(x):
161			try:
162			return map_ap_wea_farm[x]
163			except:
164			return np.nan
165
166			# The function 'voltage' returns the voltage level a wind turbine operates
167			def voltage(x):
168			try:
169			return map_ap_wea_voltage[x]
170			except:
171			return np.nan
172
173			# Connect to the data base
174			con = db.engine()
175			sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
176			# wf_areas has all the potential areas geometries for wind farms
177			wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
178			# bus has the connection points of the wind farms
179			bus = pd.read_csv(
180			WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
181			index_col="MaStRNummer",
182			)
183			# Drop all the rows without connection point
184			bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
185			# wea has info of each wind turbine in Germany.
186			wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
187
188			# Delete all the rows without information about geographical location
189			wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
190			# Delete all the offshore wind turbines
191			wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
192			# the variable map_ap_wea_farm have the connection point of all the
193			# available wt in the dataframe bus.
194			map_ap_wea_farm = {}
195			map_ap_wea_voltage = {}
196
197			wea["connection point"] = wea["LokationMastrNummer"].map(
198			bus["NetzanschlusspunktMastrNummer"]
199			)
200			wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])
201
202			# Create the columns 'geometry' which will have location of each WT in a
203			# point type
204			wea = gpd.GeoDataFrame(
205			wea,
206			geometry=gpd.points_from_xy(
207			wea["Laengengrad"], wea["Breitengrad"], crs=4326
208			),
209			)
210
211			# wf_size storages the number of WT connected to each connection point
212			wf_size = wea["connection point"].value_counts()
213			# Delete all the connection points with less than 3 WT
214			wf_size = wf_size[wf_size >= 3]
215			# Filter all the WT which are not part of a wind farm of at least 3 WT
216			wea = wea[wea["connection point"].isin(wf_size.index)]
217			# current_wfs has all the geometries that represent the existing wind
218			# farms
219			current_wfs = gpd.GeoDataFrame(
220			index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
221			)
222			for conn_point, wt_location in wea.groupby("connection point"):
223			current_wfs.at[conn_point, "geometry"] = MultiPoint(
224			wt_location["geometry"].values
225			).convex_hull
226			current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
227
228			current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
229			current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
230			current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
231			# The 'filter_wts' is used to discard atypical values for the current wind
232			# farms
233			current_wfs["filter2"] = current_wfs["geometry2"].apply(
234			lambda x: filter_current_wf(x)
235			)
236
237			# Apply the filter based on area and perimeter
238			current_wfs = current_wfs[current_wfs["filter2"]]
239			current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
240
241			wf_areas["area [km²]"] = wf_areas.area / 1000000
242
243			# Exclude areas smaller than X m². X was calculated as the area of
244			# 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
245			min_area = 4 * (0.126*2) np.sqrt(3)
246			wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
247
248			# Find the centroid of all the suitable potential areas
249			wf_areas["centroid"] = wf_areas.centroid
250
251			# find the potential areas that intersects the convex hulls of current
252			# wind farms and assign voltage levels
253			wf_areas = wf_areas.to_crs(4326)
254			for i in wf_areas.index:
255			intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
256			if intersection.any() == False:
257			wf_areas.at[i, "voltage"] = "No Intersection"
258			else:
259			wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
260
261			# wf_areas_ni has the potential areas which don't intersect any current
262			# wind farm
263			wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
264			wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
265			return wf_areas, wf_areas_ni
266
267
268			def wind_power_states(
269			state_wf,
270			state_wf_ni,
271			state_mv_districts,
272			target_power,
273			scenario_year,
274			source,
275			fed_state,
276			):
277			"""Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
278			database.
279
280			Parameters
281			----------
282			state_wf: geodataframe, mandatory
283			gdf containing all the wf in the state created based on existing wf.
284			state_wf_ni: geodataframe, mandatory
285			potential areas in the the state wich don't intersect any existing wf
286			state_mv_districts: geodataframe, mandatory
287			gdf containing all the MV/HV substations in the state
288			target_power: int, mandatory
289			Objective power for a state given in MW
290			scenario_year: str, mandatory
291			name of the scenario
292			source: str, mandatory
293			Type of energy genetor. Always "Wind_onshore" for this script.
294			fed_state: str, mandatory
295			Name of the state where the wind farms will be allocated
296
297			"""
298
299			def match_district_se(x):
300			for sub in hvmv_substation.index:
301			if x["geom"].contains(hvmv_substation.at[sub, "point"]):
302			return hvmv_substation.at[sub, "point"]
303
304			con = db.engine()
305			sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
306			# hvmv_substation has the information about HV transmission lines in
307			# Germany
308			hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
309
310			# Set wind potential depending on geographical location
311			power_north = 21.05 # MW/km²
312			power_south = 16.81 # MW/km²
313			# Set a maximum installed capacity to limit the power of big potential
314			# areas
315			max_power_hv = 120 # in MW
316			max_power_mv = 20 # in MW
317			# Max distance between WF (connected to MV) and nearest HV substation
318			# that allows its connection to HV.
319			max_dist_hv = 20000 # in meters
320
321			summary = pd.DataFrame(
322			columns=["state", "target", "from existin WF", "MV districts"]
323			)
324
325			north = [
326			"Schleswig-Holstein",
327			"Mecklenburg-Vorpommern",
328			"Niedersachsen",
329			"Bremen",
330			"Hamburg",
331			]
332
333			if fed_state == "DE":
334			sql = f"""SELECT * FROM boundaries.vg250_lan
335			WHERE gen in {tuple(north)}
336			"""
337			north_states = gpd.GeoDataFrame.from_postgis(
338			sql, con, geom_col="geometry"
339			)
340			north_states.to_crs(3035, inplace=True)
341			state_wf["nord"] = state_wf.within(north_states.unary_union)
342			state_wf["inst capacity [MW]"] = state_wf.apply(
343			lambda x: (
344			power_north * x["area [km²]"]
345			if x["nord"]
346			else power_south * x["area [km²]"]
347			),
348			axis=1,
349			)
350			else:
351			if fed_state in north:
352			state_wf["inst capacity [MW]"] = (
353			power_north * state_wf["area [km²]"]
354			)
355			else:
356			state_wf["inst capacity [MW]"] = (
357			power_south * state_wf["area [km²]"]
358			)
359
360			# Divide selected areas based on voltage of connection points
361			wf_mv = state_wf[
362			(state_wf["voltage"] != "Hochspannung")
363			& (state_wf["voltage"] != "Hoechstspannung")
364			& (state_wf["voltage"] != "UmspannungZurHochspannung")
365			]
366
367			wf_hv = state_wf[
368			(state_wf["voltage"] == "Hochspannung")
369			\| (state_wf["voltage"] == "Hoechstspannung")
370			\| (state_wf["voltage"] == "UmspannungZurHochspannung")
371			]
372
373			# Wind farms connected to MV network will be connected to HV network if
374			# the distance to the closest HV substation is =< max_dist_hv, and the
375			# installed capacity is bigger than max_power_mv
376			hvmv_substation = hvmv_substation.to_crs(3035)
377			hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
378			lambda x: int(re.split(";\|:", x)[0])
379			)
380			hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
381			hv_substations = hv_substations.unary_union # join all the hv_substations
382			wf_mv["dist_to_HV"] = (
383			state_wf["geom"].to_crs(3035).distance(hv_substations)
384			)
385			wf_mv_to_hv = wf_mv[
386			(wf_mv["dist_to_HV"] <= max_dist_hv)
387			& (wf_mv["inst capacity [MW]"] >= max_power_mv)
388			]
389			wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
390			wf_mv_to_hv["voltage"] = "Hochspannung"
391
392			wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
393			wf_mv = wf_mv[
394			(wf_mv["dist_to_HV"] > max_dist_hv)
395			\| (wf_mv["inst capacity [MW]"] < max_power_mv)
396			]
397			wf_mv = wf_mv.drop(columns=["dist_to_HV"])
398
399			wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
400			lambda x: x if x < max_power_hv else max_power_hv
401			)
402
403			wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
404			lambda x: x if x < max_power_mv else max_power_mv
405			)
406
407			wind_farms = pd.concat([wf_hv, wf_mv])
408
409			# Adjust the total installed capacity to the scenario
410			total_wind_power = (
411			wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
412			)
413
414			if total_wind_power > target_power:
415			scale_factor = target_power / total_wind_power
416			wf_mv["inst capacity [MW]"] = (
417			wf_mv["inst capacity [MW]"] * scale_factor
418			)
419			wf_hv["inst capacity [MW]"] = (
420			wf_hv["inst capacity [MW]"] * scale_factor
421			)
422			wind_farms = pd.concat([wf_hv, wf_mv])
423			summary = pd.concat(
424			[
425			summary,
426			pd.DataFrame(
427			index=[summary.index.max() + 1],
428			data={
429			"state": fed_state,
430			"target": target_power,
431			"from existin WF": wind_farms[
432			"inst capacity [MW]"
433			].sum(),
434			"MV districts": 0,
435			},
436			),
437			],
438			ignore_index=True,
439			)
440			else:
441			extra_wf = state_mv_districts.copy()
442			extra_wf = extra_wf.set_geometry("geom")
443			extra_wf["area [km²]"] = 0.0
444			for district in extra_wf.index:
445			try:
446			pot_area_district = gpd.clip(
447			state_wf_ni, extra_wf.at[district, "geom"]
448			)
449			extra_wf.at[district, "area [km²]"] = pot_area_district[
450			"area [km²]"
451			].sum()
452			except:
453			print(district)
454			extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
455			total_new_area = extra_wf["area [km²]"].sum()
456			scale_factor = (target_power - total_wind_power) / total_new_area
457			extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
458			extra_wf["voltage"] = "Hochspannung"
459			summary = pd.concat(
460			[
461			summary,
462			pd.DataFrame(
463			index=[summary.index.max() + 1],
464			data={
465			"state": fed_state,
466			"target": target_power,
467			"from existin WF": wind_farms[
468			"inst capacity [MW]"
469			].sum(),
470			"MV districts": extra_wf["inst capacity [MW]"].sum(),
471			},
472			),
473			],
474			ignore_index=True,
475			)
476			extra_wf.to_crs(4326, inplace=True)
477			wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)
478
479			# Use Definition of thresholds for voltage level assignment
480			wind_farms["voltage_level"] = 0
481			for i in wind_farms.index:
482			try:
483			if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
484			wind_farms.at[i, "voltage_level"] = 5
485			continue
486			if wind_farms.at[i, "inst capacity [MW]"] < 20:
487			wind_farms.at[i, "voltage_level"] = 4
488			continue
489			if wind_farms.at[i, "inst capacity [MW]"] >= 20:
490			wind_farms.at[i, "voltage_level"] = 3
491			continue
492			except:
493			print(i)
494
495			# Look for the maximum id in the table egon_power_plants
496			sql = "SELECT MAX(id) FROM supply.egon_power_plants"
497			max_id = pd.read_sql(sql, con)
498			max_id = max_id["max"].iat[0]
499			if max_id is None:
500			wind_farm_id = 1
501			else:
502			wind_farm_id = int(max_id + 1)
503
504			# write_table in egon-data database:
505
506			# Copy relevant columns from wind_farms
507			insert_wind_farms = wind_farms[
508			["inst capacity [MW]", "voltage_level", "centroid"]
509			]
510
511			# Set static column values
512			insert_wind_farms["carrier"] = source
513			insert_wind_farms["scenario"] = scenario_year
514
515			# Change name and crs of geometry column
516			insert_wind_farms = (
517			insert_wind_farms.rename(
518			{"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
519			)
520			.set_geometry("geom")
521			.to_crs(4326)
522			)
523
524			# Reset index
525			insert_wind_farms.index = pd.RangeIndex(
526			start=wind_farm_id,
527			stop=wind_farm_id + len(insert_wind_farms),
528			name="id",
529			)
530
531			# Delete old wind_onshore generators
532			db.execute_sql(
533			f"""DELETE FROM supply.egon_power_plants
534			WHERE carrier = 'wind_onshore'
535			AND scenario = '{scenario_year}'
536			"""
537			)
538
539			# Insert into database
540			insert_wind_farms.reset_index().to_postgis(
541			"egon_power_plants",
542			schema="supply",
543			con=db.engine(),
544			if_exists="append",
545			)
546			return wind_farms, summary
547
548
549			def generate_map():
550			"""Generates a map with the position of all the wind farms
551
552			Parameters
553			----------
554			*No parameters required
555
556			"""
557			con = db.engine()
558
559			# Import wind farms from egon-data
560			sql = (
561			"SELECT carrier, el_capacity, geom, scenario FROM "
562			"supply.egon_power_plants WHERE carrier = 'wind_onshore'"
563			)
564			wind_farms_t = gpd.GeoDataFrame.from_postgis(
565			sql, con, geom_col="geom", crs=4326
566			)
567			wind_farms_t = wind_farms_t.to_crs(3035)
568
569			for scenario in wind_farms_t.scenario.unique():
570			wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
571			# mv_districts has geographic info of medium voltage districts in
572			# Germany
573			sql = "SELECT geom FROM grid.egon_mv_grid_district"
574			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
575			mv_districts = mv_districts.to_crs(3035)
576
577			mv_districts["power"] = 0.0
578			for std in mv_districts.index:
579			try:
580			mv_districts.at[std, "power"] = gpd.clip(
581			wind_farms, mv_districts.at[std, "geom"]
582			).el_capacity.sum()
583			except:
584			print(std)
585
586			fig, ax = plt.subplots(1, 1)
587			mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
588			mv_districts.plot(
589			ax=ax,
590			column="power",
591			cmap="magma_r",
592			legend=True,
593			legend_kwds={
594			"label": "Installed capacity in MW",
595			"orientation": "vertical",
596			},
597			)
598			plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
599			return 0
600

openego / eGon-data

Push — dev ( f143c8...357bd8 )

data.datasets.power_plants.wind_farms.insert() C

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like