data.datasets.power_plants.wind_farms - Code Metrics - Inspection of "Fixes/#1190 profile building allocation wrong cell..." - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1197)

unknown

created 2025-03-13 22:39 UTC

data.datasets.power_plants.wind_farms B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	592
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	43
eloc	352
dl	0
loc	592
rs	8.96
c	0
b	0
f	0

4 Functions

Rating	Name	Size	Complexity
B	insert()	113	6
F	wind_power_states()	278	21
A	generate_map()	51	4
D	generate_wind_farms()	132	12

How to fix Complexity

from matplotlib import pyplot as plt
from shapely.geometry import MultiPoint, Point
import geopandas as gpd
import numpy as np
import pandas as pd

from egon.data import db
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
import egon.data.config


def insert():
    """Main function. Import power objectives generate results calling the
    functions "generate_wind_farms" and  "wind_power_states".

    Parameters
    ----------
    *No parameters required

    """

    con = db.engine()

    # federal_std has the shapes of the German states
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
    federal_std = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geometry", crs=4326
    )

    # target_power_df has the expected capacity of each federal state
    sql = (
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
        "supply.egon_scenario_capacities"
    )
    target_power_df = pd.read_sql(sql, con)

    # mv_districts has geographic info of medium voltage districts in Germany
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)

    # Delete all the water bodies from the federal states shapes
    federal_std = federal_std[federal_std["gf"] == 4]
    federal_std.drop(columns=["gf"], inplace=True)
    # Filter the potential expected from wind_onshore
    target_power_df = target_power_df[
        target_power_df["carrier"] == "wind_onshore"
    ]
    target_power_df.set_index("nuts", inplace=True)
    target_power_df["geom"] = Point(0, 0)

    # Join the geometries which belong to the same states
    for std in target_power_df.index:
        df = federal_std[federal_std["nuts"] == std]
        if df.size > 0:
            target_power_df.at[std, "name"] = df["gen"].iat[0]
        else:
            target_power_df.at[std, "name"] = np.nan
        target_power_df.at[std, "geom"] = df.unary_union
    target_power_df = gpd.GeoDataFrame(
        target_power_df, geometry="geom", crs=4326
    )
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
    target_power_df = target_power_df.to_crs(3035)

    # Create the shape for full Germany
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
    target_power_df.at["DE", "name"] = "Germany"
    # Generate WFs for Germany based on potential areas and existing WFs
    wf_areas, wf_areas_ni = generate_wind_farms()

    # Change the columns "geometry" of this GeoDataFrames
    wf_areas.set_geometry("centroid", inplace=True)
    wf_areas_ni.set_geometry("centroid", inplace=True)

    # Create centroids of mv_districts to apply the clip function
    mv_districts["centroid"] = mv_districts.centroid
    mv_districts.set_geometry("centroid", inplace=True)

    summary_t = pd.DataFrame()
    farms = pd.DataFrame()

    if "eGon100RE" in target_power_df["scenario_name"].values:
        wind_farms_state, summary_state = wind_power_states(
            wf_areas,
            wf_areas_ni,
            mv_districts,
            target_power_df["capacity"].values[0],
            "eGon100RE",
            "wind_onshore",
            "DE",
        )
        target_power_df = target_power_df[
            target_power_df["scenario_name"] != "eGon100RE"
        ]

    if "eGon2035" in target_power_df["scenario_name"].values:
        # Fit wind farms scenarions for each one of the states
        for bundesland in target_power_df.index:
            state_wf = gpd.clip(wf_areas, target_power_df.at[bundesland, "geom"])
            state_wf_ni = gpd.clip(
                wf_areas_ni, target_power_df.at[bundesland, "geom"]
            )
            state_mv_districts = gpd.clip(
                mv_districts, target_power_df.at[bundesland, "geom"]
            )
            target_power = target_power_df.at[bundesland, "capacity"]
            scenario_year = target_power_df.at[bundesland, "scenario_name"]
            source = target_power_df.at[bundesland, "carrier"]
            fed_state = target_power_df.at[bundesland, "name"]
            wind_farms_state, summary_state = wind_power_states(
                state_wf,
                state_wf_ni,
                state_mv_districts,
                target_power,
                scenario_year,
                source,
                fed_state,
            )
            summary_t = pd.concat([summary_t, summary_state])
            farms = pd.concat([farms, wind_farms_state])

    generate_map()

    return


def generate_wind_farms():
    """Generate wind farms based on existing wind farms.

    Parameters
    ----------
    *No parameters required

    """
    # get config
    cfg = egon.data.config.datasets()["power_plants"]

    # Due to typos in some inputs, some areas of existing wind farms
    # should be discarded using perimeter and area filters
    def filter_current_wf(wf_geometry):
        if wf_geometry.geom_type == "Point":
            return True
        if wf_geometry.geom_type == "Polygon":
            area = wf_geometry.area
            length = wf_geometry.length
            # Filter based on the biggest (# of WT) wind farm
            return (area < 40000000) & (length < 40000)
        if wf_geometry.geom_type == "LineString":
            length = wf_geometry.length
            return length < 1008  # 8 * rotor diameter (8*126m)

    # The function 'wind_farm' returns the connection point of a wind turbine
    def wind_farm(x):
        try:
            return map_ap_wea_farm[x]
        except:
            return np.nan

    # The function 'voltage' returns the voltage level a wind turbine operates
    def voltage(x):
        try:
            return map_ap_wea_voltage[x]
        except:
            return np.nan

    # Connect to the data base
    con = db.engine()
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
    # wf_areas has all the potential areas geometries for wind farms
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
    # bus has the connection points of the wind farms
    bus = pd.read_csv(
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
        index_col="MaStRNummer",
    )
    # Drop all the rows without connection point
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
    # wea has info of each wind turbine in Germany.
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])

    # Delete all the rows without information about geographical location
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
    # Delete all the offshore wind turbines
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
    # the variable map_ap_wea_farm have the connection point of all the
    # available wt in the dataframe bus.
    map_ap_wea_farm = {}
    map_ap_wea_voltage = {}

    wea["connection point"] = wea["LokationMastrNummer"].map(
        bus["NetzanschlusspunktMastrNummer"]
    )
    wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])

    # Create the columns 'geometry' which will have location of each WT in a
    # point type
    wea = gpd.GeoDataFrame(
        wea,
        geometry=gpd.points_from_xy(
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
        ),
    )

    # wf_size storages the number of WT connected to each connection point
    wf_size = wea["connection point"].value_counts()
    # Delete all the connection points with less than 3 WT
    wf_size = wf_size[wf_size >= 3]
    # Filter all the WT which are not part of a wind farm of at least 3 WT
    wea = wea[wea["connection point"].isin(wf_size.index)]
    # current_wfs has all the geometries that represent the existing wind
    # farms
    current_wfs = gpd.GeoDataFrame(
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
    )
    for conn_point, wt_location in wea.groupby("connection point"):
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
            wt_location["geometry"].values
        ).convex_hull
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]

    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
    # The 'filter_wts' is used to discard atypical values for the current wind
    # farms
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
        lambda x: filter_current_wf(x)
    )

    # Apply the filter based on area and perimeter
    current_wfs = current_wfs[current_wfs["filter2"]]
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])

    wf_areas["area [km²]"] = wf_areas.area / 1000000

    # Exclude areas smaller than X m². X was calculated as the area of
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
    min_area = 4 * (0.126**2) * np.sqrt(3)
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]

    # Find the centroid of all the suitable potential areas
    wf_areas["centroid"] = wf_areas.centroid

    # find the potential areas that intersects the convex hulls of current
    # wind farms and assign voltage levels
    wf_areas = wf_areas.to_crs(4326)
    for i in wf_areas.index:
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
        if intersection.any() == False:
            wf_areas.at[i, "voltage"] = "No Intersection"
        else:
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]

    # wf_areas_ni has the potential areas which don't intersect any current
    # wind farm
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
    return wf_areas, wf_areas_ni


def wind_power_states(
    state_wf,
    state_wf_ni,
    state_mv_districts,
    target_power,
    scenario_year,
    source,
    fed_state,
):
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
    database.

    Parameters
    ----------
    state_wf: geodataframe, mandatory
        gdf containing all the wf in the state created based on existing wf.
    state_wf_ni: geodataframe, mandatory
        potential areas in the the state wich don't intersect any existing wf
    state_mv_districts: geodataframe, mandatory
        gdf containing all the MV/HV substations in the state
    target_power: int, mandatory
        Objective power for a state given in MW
    scenario_year: str, mandatory
        name of the scenario
    source: str, mandatory
        Type of energy genetor. Always "Wind_onshore" for this script.
    fed_state: str, mandatory
        Name of the state where the wind farms will be allocated

    """

    def match_district_se(x):
        for sub in hvmv_substation.index:
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
                return hvmv_substation.at[sub, "point"]

    con = db.engine()
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
    # hvmv_substation has the information about HV transmission lines in
    # Germany
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")

    # Set wind potential depending on geographical location
    power_north = 21.05  # MW/km²
    power_south = 16.81  # MW/km²
    # Set a maximum installed capacity to limit the power of big potential
    # areas
    max_power_hv = 120  # in MW
    max_power_mv = 20  # in MW
    # Max distance between WF (connected to MV) and nearest HV substation
    # that allows its connection to HV.
    max_dist_hv = 20000  # in meters

    summary = pd.DataFrame(
        columns=["state", "target", "from existin WF", "MV districts"]
    )

    north = [
        "Schleswig-Holstein",
        "Mecklenburg-Vorpommern",
        "Niedersachsen",
        "Bremen",
        "Hamburg",
    ]

    if fed_state == "DE":
        sql = f"""SELECT * FROM boundaries.vg250_lan
        WHERE gen in {tuple(north)}
        """
        north_states = gpd.GeoDataFrame.from_postgis(
            sql, con, geom_col="geometry"
        )
        north_states.to_crs(3035, inplace=True)
        state_wf["nord"] = state_wf.within(north_states.unary_union)
        state_wf["inst capacity [MW]"] = state_wf.apply(
            lambda x: (
                power_north * x["area [km²]"]
                if x["nord"]
                else power_south * x["area [km²]"]),
            axis=1,
        )
    else:
        if fed_state in north:
            state_wf["inst capacity [MW]"] = (
                power_north * state_wf["area [km²]"]
            )
        else:
            state_wf["inst capacity [MW]"] = (
                power_south * state_wf["area [km²]"]
            )

    # Divide selected areas based on voltage of connection points
    wf_mv = state_wf[
        (state_wf["voltage"] != "Hochspannung")
        & (state_wf["voltage"] != "Hoechstspannung")
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
    ]

    wf_hv = state_wf[
        (state_wf["voltage"] == "Hochspannung")
        | (state_wf["voltage"] == "Hoechstspannung")
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
    ]

    # Wind farms connected to MV network will be connected to HV network if
    # the distance to the closest HV substation is =< max_dist_hv, and the
    # installed capacity is bigger than max_power_mv
    hvmv_substation = hvmv_substation.to_crs(3035)
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
        lambda x: int(x.split(";")[0])
    )
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
    hv_substations = hv_substations.unary_union  # join all the hv_substations
    wf_mv["dist_to_HV"] = (
        state_wf["geom"].to_crs(3035).distance(hv_substations)
    )
    wf_mv_to_hv = wf_mv[
        (wf_mv["dist_to_HV"] <= max_dist_hv)
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
    ]
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
    wf_mv_to_hv["voltage"] = "Hochspannung"

    wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
    wf_mv = wf_mv[
        (wf_mv["dist_to_HV"] > max_dist_hv)
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
    ]
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])

    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_hv else max_power_hv
    )

    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_mv else max_power_mv
    )

    wind_farms = pd.concat([wf_hv, wf_mv])

    # Adjust the total installed capacity to the scenario
    total_wind_power = (
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
    )

    if total_wind_power > target_power:
        scale_factor = target_power / total_wind_power
        wf_mv["inst capacity [MW]"] = (
            wf_mv["inst capacity [MW]"] * scale_factor
        )
        wf_hv["inst capacity [MW]"] = (
            wf_hv["inst capacity [MW]"] * scale_factor
        )
        wind_farms = pd.concat([wf_hv, wf_mv])
        summary = pd.concat(
            [
                summary,
                pd.DataFrame(
                    index=[summary.index.max() + 1],
                    data={
                        "state": fed_state,
                        "target": target_power,
                        "from existin WF": wind_farms[
                            "inst capacity [MW]"
                        ].sum(),
                        "MV districts": 0,
                    },
                ),
            ],
            ignore_index=True,
        )
    else:
        extra_wf = state_mv_districts.copy()
        extra_wf = extra_wf.set_geometry("geom")
        extra_wf["area [km²]"] = 0.0
        for district in extra_wf.index:
            try:
                pot_area_district = gpd.clip(
                    state_wf_ni, extra_wf.at[district, "geom"]
                )
                extra_wf.at[district, "area [km²]"] = pot_area_district[
                    "area [km²]"
                ].sum()
            except:
                print(district)
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
        total_new_area = extra_wf["area [km²]"].sum()
        scale_factor = (target_power - total_wind_power) / total_new_area
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
        extra_wf["voltage"] = "Hochspannung"
        summary = pd.concat(
            [
                summary,
                pd.DataFrame(
                    index=[summary.index.max() + 1],
                    data={
                        "state": fed_state,
                        "target": target_power,
                        "from existin WF": wind_farms[
                            "inst capacity [MW]"
                        ].sum(),
                        "MV districts": extra_wf["inst capacity [MW]"].sum(),
                    },
                ),
            ],
            ignore_index=True,
        )
        extra_wf.to_crs(4326, inplace=True)
        wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)

    # Use Definition of thresholds for voltage level assignment
    wind_farms["voltage_level"] = 0
    for i in wind_farms.index:
        try:
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
                wind_farms.at[i, "voltage_level"] = 5
                continue
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
                wind_farms.at[i, "voltage_level"] = 4
                continue
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
                wind_farms.at[i, "voltage_level"] = 3
                continue
        except:
            print(i)

    # Look for the maximum id in the table egon_power_plants
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
    max_id = pd.read_sql(sql, con)
    max_id = max_id["max"].iat[0]
    if max_id is None:
        wind_farm_id = 1
    else:
        wind_farm_id = int(max_id + 1)

    # write_table in egon-data database:

    # Copy relevant columns from wind_farms
    insert_wind_farms = wind_farms[
        ["inst capacity [MW]", "voltage_level", "centroid"]
    ]

    # Set static column values
    insert_wind_farms["carrier"] = source
    insert_wind_farms["scenario"] = scenario_year

    # Change name and crs of geometry column
    insert_wind_farms = (
        insert_wind_farms.rename(
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
        )
        .set_geometry("geom")
        .to_crs(4326)
    )

    # Reset index
    insert_wind_farms.index = pd.RangeIndex(
        start=wind_farm_id,
        stop=wind_farm_id + len(insert_wind_farms),
        name="id",
    )

    # Delete old wind_onshore generators
    db.execute_sql(
        f"""DELETE FROM supply.egon_power_plants
        WHERE carrier = 'wind_onshore'
        AND scenario = '{scenario_year}'
        """
    )

    # Insert into database
    insert_wind_farms.reset_index().to_postgis(
        "egon_power_plants",
        schema="supply",
        con=db.engine(),
        if_exists="append",
    )
    return wind_farms, summary


def generate_map():
    """Generates a map with the position of all the wind farms

    Parameters
    ----------
    *No parameters required

    """
    con = db.engine()

    # Import wind farms from egon-data
    sql = (
        "SELECT  carrier, el_capacity, geom, scenario FROM "
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
    )
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geom", crs=4326
    )
    wind_farms_t = wind_farms_t.to_crs(3035)

    for scenario in wind_farms_t.scenario.unique():
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
        # mv_districts has geographic info of medium voltage districts in
        # Germany
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
        mv_districts = mv_districts.to_crs(3035)

        mv_districts["power"] = 0.0
        for std in mv_districts.index:
            try:
                mv_districts.at[std, "power"] = gpd.clip(
                    wind_farms, mv_districts.at[std, "geom"]
                ).el_capacity.sum()
            except:
                print(std)

        fig, ax = plt.subplots(1, 1)
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
        mv_districts.plot(
            ax=ax,
            column="power",
            cmap="magma_r",
            legend=True,
            legend_kwds={
                "label": "Installed capacity in MW",
                "orientation": "vertical",
            },
        )
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
    return 0


1			from matplotlib import pyplot as plt
2			from shapely.geometry import MultiPoint, Point
3			import geopandas as gpd
4			import numpy as np
5			import pandas as pd
6
7			from egon.data import db
8			from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
9			import egon.data.config
10
11
12			def insert():
13			"""Main function. Import power objectives generate results calling the
14			functions "generate_wind_farms" and "wind_power_states".
15
16			Parameters
17			----------
18			*No parameters required
19
20			"""
21
22			con = db.engine()
23
24			# federal_std has the shapes of the German states
25			sql = "SELECT gen, gf, nuts, geometry FROM boundaries.vg250_lan"
26			federal_std = gpd.GeoDataFrame.from_postgis(
27			sql, con, geom_col="geometry", crs=4326
28			)
29
30			# target_power_df has the expected capacity of each federal state
31			sql = (
32			"SELECT carrier, capacity, nuts, scenario_name FROM "
33			"supply.egon_scenario_capacities"
34			)
35			target_power_df = pd.read_sql(sql, con)
36
37			# mv_districts has geographic info of medium voltage districts in Germany
38			sql = "SELECT geom FROM grid.egon_mv_grid_district"
39			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
40
41			# Delete all the water bodies from the federal states shapes
42			federal_std = federal_std[federal_std["gf"] == 4]
43			federal_std.drop(columns=["gf"], inplace=True)
44			# Filter the potential expected from wind_onshore
45			target_power_df = target_power_df[
46			target_power_df["carrier"] == "wind_onshore"
47			]
48			target_power_df.set_index("nuts", inplace=True)
49			target_power_df["geom"] = Point(0, 0)
50
51			# Join the geometries which belong to the same states
52			for std in target_power_df.index:
53			df = federal_std[federal_std["nuts"] == std]
54			if df.size > 0:
55			target_power_df.at[std, "name"] = df["gen"].iat[0]
56			else:
57			target_power_df.at[std, "name"] = np.nan
58			target_power_df.at[std, "geom"] = df.unary_union
59			target_power_df = gpd.GeoDataFrame(
60			target_power_df, geometry="geom", crs=4326
61			)
62			target_power_df = target_power_df[target_power_df["capacity"] > 0]
63			target_power_df = target_power_df.to_crs(3035)
64
65			# Create the shape for full Germany
66			target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
67			target_power_df.at["DE", "name"] = "Germany"
68			# Generate WFs for Germany based on potential areas and existing WFs
69			wf_areas, wf_areas_ni = generate_wind_farms()
70
71			# Change the columns "geometry" of this GeoDataFrames
72			wf_areas.set_geometry("centroid", inplace=True)
73			wf_areas_ni.set_geometry("centroid", inplace=True)
74
75			# Create centroids of mv_districts to apply the clip function
76			mv_districts["centroid"] = mv_districts.centroid
77			mv_districts.set_geometry("centroid", inplace=True)
78
79			summary_t = pd.DataFrame()
80			farms = pd.DataFrame()
81
82			if "eGon100RE" in target_power_df["scenario_name"].values:
83			wind_farms_state, summary_state = wind_power_states(
84			wf_areas,
85			wf_areas_ni,
86			mv_districts,
87			target_power_df["capacity"].values[0],
88			"eGon100RE",
89			"wind_onshore",
90			"DE",
91			)
92			target_power_df = target_power_df[
93			target_power_df["scenario_name"] != "eGon100RE"
94			]
95
96			if "eGon2035" in target_power_df["scenario_name"].values:
97			# Fit wind farms scenarions for each one of the states
98			for bundesland in target_power_df.index:
99			state_wf = gpd.clip(wf_areas, target_power_df.at[bundesland, "geom"])
100			state_wf_ni = gpd.clip(
101			wf_areas_ni, target_power_df.at[bundesland, "geom"]
102			)
103			state_mv_districts = gpd.clip(
104			mv_districts, target_power_df.at[bundesland, "geom"]
105			)
106			target_power = target_power_df.at[bundesland, "capacity"]
107			scenario_year = target_power_df.at[bundesland, "scenario_name"]
108			source = target_power_df.at[bundesland, "carrier"]
109			fed_state = target_power_df.at[bundesland, "name"]
110			wind_farms_state, summary_state = wind_power_states(
111			state_wf,
112			state_wf_ni,
113			state_mv_districts,
114			target_power,
115			scenario_year,
116			source,
117			fed_state,
118			)
119			summary_t = pd.concat([summary_t, summary_state])
120			farms = pd.concat([farms, wind_farms_state])
121
122			generate_map()
123
124			return
125
126
127			def generate_wind_farms():
128			"""Generate wind farms based on existing wind farms.
129
130			Parameters
131			----------
132			*No parameters required
133
134			"""
135			# get config
136			cfg = egon.data.config.datasets()["power_plants"]
137
138			# Due to typos in some inputs, some areas of existing wind farms
139			# should be discarded using perimeter and area filters
140			def filter_current_wf(wf_geometry):
141			if wf_geometry.geom_type == "Point":
142			return True
143			if wf_geometry.geom_type == "Polygon":
144			area = wf_geometry.area
145			length = wf_geometry.length
146			# Filter based on the biggest (# of WT) wind farm
147			return (area < 40000000) & (length < 40000)
148			if wf_geometry.geom_type == "LineString":
149			length = wf_geometry.length
150			return length < 1008 # 8 * rotor diameter (8*126m)
151
152			# The function 'wind_farm' returns the connection point of a wind turbine
153			def wind_farm(x):
154			try:
155			return map_ap_wea_farm[x]
156			except:
157			return np.nan
158
159			# The function 'voltage' returns the voltage level a wind turbine operates
160			def voltage(x):
161			try:
162			return map_ap_wea_voltage[x]
163			except:
164			return np.nan
165
166			# Connect to the data base
167			con = db.engine()
168			sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
169			# wf_areas has all the potential areas geometries for wind farms
170			wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
171			# bus has the connection points of the wind farms
172			bus = pd.read_csv(
173			WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
174			index_col="MaStRNummer",
175			)
176			# Drop all the rows without connection point
177			bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
178			# wea has info of each wind turbine in Germany.
179			wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
180
181			# Delete all the rows without information about geographical location
182			wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
183			# Delete all the offshore wind turbines
184			wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
185			# the variable map_ap_wea_farm have the connection point of all the
186			# available wt in the dataframe bus.
187			map_ap_wea_farm = {}
188			map_ap_wea_voltage = {}
189
190			wea["connection point"] = wea["LokationMastrNummer"].map(
191			bus["NetzanschlusspunktMastrNummer"]
192			)
193			wea["voltage"] = wea["LokationMastrNummer"].map(bus["Spannungsebene"])
194
195			# Create the columns 'geometry' which will have location of each WT in a
196			# point type
197			wea = gpd.GeoDataFrame(
198			wea,
199			geometry=gpd.points_from_xy(
200			wea["Laengengrad"], wea["Breitengrad"], crs=4326
201			),
202			)
203
204			# wf_size storages the number of WT connected to each connection point
205			wf_size = wea["connection point"].value_counts()
206			# Delete all the connection points with less than 3 WT
207			wf_size = wf_size[wf_size >= 3]
208			# Filter all the WT which are not part of a wind farm of at least 3 WT
209			wea = wea[wea["connection point"].isin(wf_size.index)]
210			# current_wfs has all the geometries that represent the existing wind
211			# farms
212			current_wfs = gpd.GeoDataFrame(
213			index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
214			)
215			for conn_point, wt_location in wea.groupby("connection point"):
216			current_wfs.at[conn_point, "geometry"] = MultiPoint(
217			wt_location["geometry"].values
218			).convex_hull
219			current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
220
221			current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
222			current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
223			current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
224			# The 'filter_wts' is used to discard atypical values for the current wind
225			# farms
226			current_wfs["filter2"] = current_wfs["geometry2"].apply(
227			lambda x: filter_current_wf(x)
228			)
229
230			# Apply the filter based on area and perimeter
231			current_wfs = current_wfs[current_wfs["filter2"]]
232			current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
233
234			wf_areas["area [km²]"] = wf_areas.area / 1000000
235
236			# Exclude areas smaller than X m². X was calculated as the area of
237			# 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
238			min_area = 4 * (0.126*2) np.sqrt(3)
239			wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
240
241			# Find the centroid of all the suitable potential areas
242			wf_areas["centroid"] = wf_areas.centroid
243
244			# find the potential areas that intersects the convex hulls of current
245			# wind farms and assign voltage levels
246			wf_areas = wf_areas.to_crs(4326)
247			for i in wf_areas.index:
248			intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
249			if intersection.any() == False:
250			wf_areas.at[i, "voltage"] = "No Intersection"
251			else:
252			wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
253
254			# wf_areas_ni has the potential areas which don't intersect any current
255			# wind farm
256			wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
257			wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
258			return wf_areas, wf_areas_ni
259
260
261			def wind_power_states(
262			state_wf,
263			state_wf_ni,
264			state_mv_districts,
265			target_power,
266			scenario_year,
267			source,
268			fed_state,
269			):
270			"""Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
271			database.
272
273			Parameters
274			----------
275			state_wf: geodataframe, mandatory
276			gdf containing all the wf in the state created based on existing wf.
277			state_wf_ni: geodataframe, mandatory
278			potential areas in the the state wich don't intersect any existing wf
279			state_mv_districts: geodataframe, mandatory
280			gdf containing all the MV/HV substations in the state
281			target_power: int, mandatory
282			Objective power for a state given in MW
283			scenario_year: str, mandatory
284			name of the scenario
285			source: str, mandatory
286			Type of energy genetor. Always "Wind_onshore" for this script.
287			fed_state: str, mandatory
288			Name of the state where the wind farms will be allocated
289
290			"""
291
292			def match_district_se(x):
293			for sub in hvmv_substation.index:
294			if x["geom"].contains(hvmv_substation.at[sub, "point"]):
295			return hvmv_substation.at[sub, "point"]
296
297			con = db.engine()
298			sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
299			# hvmv_substation has the information about HV transmission lines in
300			# Germany
301			hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
302
303			# Set wind potential depending on geographical location
304			power_north = 21.05 # MW/km²
305			power_south = 16.81 # MW/km²
306			# Set a maximum installed capacity to limit the power of big potential
307			# areas
308			max_power_hv = 120 # in MW
309			max_power_mv = 20 # in MW
310			# Max distance between WF (connected to MV) and nearest HV substation
311			# that allows its connection to HV.
312			max_dist_hv = 20000 # in meters
313
314			summary = pd.DataFrame(
315			columns=["state", "target", "from existin WF", "MV districts"]
316			)
317
318			north = [
319			"Schleswig-Holstein",
320			"Mecklenburg-Vorpommern",
321			"Niedersachsen",
322			"Bremen",
323			"Hamburg",
324			]
325
326			if fed_state == "DE":
327			sql = f"""SELECT * FROM boundaries.vg250_lan
328			WHERE gen in {tuple(north)}
329			"""
330			north_states = gpd.GeoDataFrame.from_postgis(
331			sql, con, geom_col="geometry"
332			)
333			north_states.to_crs(3035, inplace=True)
334			state_wf["nord"] = state_wf.within(north_states.unary_union)
335			state_wf["inst capacity [MW]"] = state_wf.apply(
336			lambda x: (
337			power_north * x["area [km²]"]
338			if x["nord"]
339			else power_south * x["area [km²]"]),
340			axis=1,
341			)
342			else:
343			if fed_state in north:
344			state_wf["inst capacity [MW]"] = (
345			power_north * state_wf["area [km²]"]
346			)
347			else:
348			state_wf["inst capacity [MW]"] = (
349			power_south * state_wf["area [km²]"]
350			)
351
352			# Divide selected areas based on voltage of connection points
353			wf_mv = state_wf[
354			(state_wf["voltage"] != "Hochspannung")
355			& (state_wf["voltage"] != "Hoechstspannung")
356			& (state_wf["voltage"] != "UmspannungZurHochspannung")
357			]
358
359			wf_hv = state_wf[
360			(state_wf["voltage"] == "Hochspannung")
361			\| (state_wf["voltage"] == "Hoechstspannung")
362			\| (state_wf["voltage"] == "UmspannungZurHochspannung")
363			]
364
365			# Wind farms connected to MV network will be connected to HV network if
366			# the distance to the closest HV substation is =< max_dist_hv, and the
367			# installed capacity is bigger than max_power_mv
368			hvmv_substation = hvmv_substation.to_crs(3035)
369			hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
370			lambda x: int(x.split(";")[0])
371			)
372			hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
373			hv_substations = hv_substations.unary_union # join all the hv_substations
374			wf_mv["dist_to_HV"] = (
375			state_wf["geom"].to_crs(3035).distance(hv_substations)
376			)
377			wf_mv_to_hv = wf_mv[
378			(wf_mv["dist_to_HV"] <= max_dist_hv)
379			& (wf_mv["inst capacity [MW]"] >= max_power_mv)
380			]
381			wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
382			wf_mv_to_hv["voltage"] = "Hochspannung"
383
384			wf_hv = pd.concat([wf_hv, wf_mv_to_hv])
385			wf_mv = wf_mv[
386			(wf_mv["dist_to_HV"] > max_dist_hv)
387			\| (wf_mv["inst capacity [MW]"] < max_power_mv)
388			]
389			wf_mv = wf_mv.drop(columns=["dist_to_HV"])
390
391			wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
392			lambda x: x if x < max_power_hv else max_power_hv
393			)
394
395			wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
396			lambda x: x if x < max_power_mv else max_power_mv
397			)
398
399			wind_farms = pd.concat([wf_hv, wf_mv])
400
401			# Adjust the total installed capacity to the scenario
402			total_wind_power = (
403			wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
404			)
405
406			if total_wind_power > target_power:
407			scale_factor = target_power / total_wind_power
408			wf_mv["inst capacity [MW]"] = (
409			wf_mv["inst capacity [MW]"] * scale_factor
410			)
411			wf_hv["inst capacity [MW]"] = (
412			wf_hv["inst capacity [MW]"] * scale_factor
413			)
414			wind_farms = pd.concat([wf_hv, wf_mv])
415			summary = pd.concat(
416			[
417			summary,
418			pd.DataFrame(
419			index=[summary.index.max() + 1],
420			data={
421			"state": fed_state,
422			"target": target_power,
423			"from existin WF": wind_farms[
424			"inst capacity [MW]"
425			].sum(),
426			"MV districts": 0,
427			},
428			),
429			],
430			ignore_index=True,
431			)
432			else:
433			extra_wf = state_mv_districts.copy()
434			extra_wf = extra_wf.set_geometry("geom")
435			extra_wf["area [km²]"] = 0.0
436			for district in extra_wf.index:
437			try:
438			pot_area_district = gpd.clip(
439			state_wf_ni, extra_wf.at[district, "geom"]
440			)
441			extra_wf.at[district, "area [km²]"] = pot_area_district[
442			"area [km²]"
443			].sum()
444			except:
445			print(district)
446			extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
447			total_new_area = extra_wf["area [km²]"].sum()
448			scale_factor = (target_power - total_wind_power) / total_new_area
449			extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
450			extra_wf["voltage"] = "Hochspannung"
451			summary = pd.concat(
452			[
453			summary,
454			pd.DataFrame(
455			index=[summary.index.max() + 1],
456			data={
457			"state": fed_state,
458			"target": target_power,
459			"from existin WF": wind_farms[
460			"inst capacity [MW]"
461			].sum(),
462			"MV districts": extra_wf["inst capacity [MW]"].sum(),
463			},
464			),
465			],
466			ignore_index=True,
467			)
468			extra_wf.to_crs(4326, inplace=True)
469			wind_farms = pd.concat([wind_farms, extra_wf], ignore_index=True)
470
471			# Use Definition of thresholds for voltage level assignment
472			wind_farms["voltage_level"] = 0
473			for i in wind_farms.index:
474			try:
475			if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
476			wind_farms.at[i, "voltage_level"] = 5
477			continue
478			if wind_farms.at[i, "inst capacity [MW]"] < 20:
479			wind_farms.at[i, "voltage_level"] = 4
480			continue
481			if wind_farms.at[i, "inst capacity [MW]"] >= 20:
482			wind_farms.at[i, "voltage_level"] = 3
483			continue
484			except:
485			print(i)
486
487			# Look for the maximum id in the table egon_power_plants
488			sql = "SELECT MAX(id) FROM supply.egon_power_plants"
489			max_id = pd.read_sql(sql, con)
490			max_id = max_id["max"].iat[0]
491			if max_id is None:
492			wind_farm_id = 1
493			else:
494			wind_farm_id = int(max_id + 1)
495
496			# write_table in egon-data database:
497
498			# Copy relevant columns from wind_farms
499			insert_wind_farms = wind_farms[
500			["inst capacity [MW]", "voltage_level", "centroid"]
501			]
502
503			# Set static column values
504			insert_wind_farms["carrier"] = source
505			insert_wind_farms["scenario"] = scenario_year
506
507			# Change name and crs of geometry column
508			insert_wind_farms = (
509			insert_wind_farms.rename(
510			{"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
511			)
512			.set_geometry("geom")
513			.to_crs(4326)
514			)
515
516			# Reset index
517			insert_wind_farms.index = pd.RangeIndex(
518			start=wind_farm_id,
519			stop=wind_farm_id + len(insert_wind_farms),
520			name="id",
521			)
522
523			# Delete old wind_onshore generators
524			db.execute_sql(
525			f"""DELETE FROM supply.egon_power_plants
526			WHERE carrier = 'wind_onshore'
527			AND scenario = '{scenario_year}'
528			"""
529			)
530
531			# Insert into database
532			insert_wind_farms.reset_index().to_postgis(
533			"egon_power_plants",
534			schema="supply",
535			con=db.engine(),
536			if_exists="append",
537			)
538			return wind_farms, summary
539
540
541			def generate_map():
542			"""Generates a map with the position of all the wind farms
543
544			Parameters
545			----------
546			*No parameters required
547
548			"""
549			con = db.engine()
550
551			# Import wind farms from egon-data
552			sql = (
553			"SELECT carrier, el_capacity, geom, scenario FROM "
554			"supply.egon_power_plants WHERE carrier = 'wind_onshore'"
555			)
556			wind_farms_t = gpd.GeoDataFrame.from_postgis(
557			sql, con, geom_col="geom", crs=4326
558			)
559			wind_farms_t = wind_farms_t.to_crs(3035)
560
561			for scenario in wind_farms_t.scenario.unique():
562			wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
563			# mv_districts has geographic info of medium voltage districts in
564			# Germany
565			sql = "SELECT geom FROM grid.egon_mv_grid_district"
566			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
567			mv_districts = mv_districts.to_crs(3035)
568
569			mv_districts["power"] = 0.0
570			for std in mv_districts.index:
571			try:
572			mv_districts.at[std, "power"] = gpd.clip(
573			wind_farms, mv_districts.at[std, "geom"]
574			).el_capacity.sum()
575			except:
576			print(std)
577
578			fig, ax = plt.subplots(1, 1)
579			mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
580			mv_districts.plot(
581			ax=ax,
582			column="power",
583			cmap="magma_r",
584			legend=True,
585			legend_kwds={
586			"label": "Installed capacity in MW",
587			"orientation": "vertical",
588			},
589			)
590			plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
591			return 0
592

openego / eGon-data

Pull Request — dev (#1197)

data.datasets.power_plants.wind_farms B

Complexity

Size/Duplication

Importance

4 Functions

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like