data.datasets.power_plants.wind_farms.generate_wind_farms() - Code Metrics - Inspection of "Update MaStR data and adapt PV+wind methods" - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1063)

unknown

created 2022-12-22 23:57 UTC

generate_wind_farms() D

↳ Parent: data.datasets.power_plants.wind_farms

Complexity

Conditions

Size

Total Lines	134
Code Lines	72

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	72
dl	0
loc	134
rs	4.2763
c	0
b	0
f	0
cc	12
nop	0

How to fix Long Method Complexity

from matplotlib import pyplot as plt
from shapely.geometry import MultiPoint, Point
import geopandas as gpd
import numpy as np
import pandas as pd

from egon.data import db
from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
import egon.data.config


def insert():
    """Main function. Import power objectives generate results calling the
    functions "generate_wind_farms" and  "wind_power_states".

    Parameters
    ----------
    *No parameters required

    """

    con = db.engine()

    # federal_std has the shapes of the German states
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
    federal_std = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geometry", crs=4326
    )

    # target_power_df has the expected capacity of each federal state
    sql = (
        "SELECT  carrier, capacity, nuts, scenario_name FROM "
        "supply.egon_scenario_capacities"
    )
    target_power_df = pd.read_sql(sql, con)

    # mv_districts has geographic info of medium voltage districts in Germany
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)

    # Delete all the water bodies from the federal states shapes
    federal_std = federal_std[federal_std["gf"] == 4]
    federal_std.drop(columns=["gf"], inplace=True)
    # Filter the potential expected from wind_onshore
    target_power_df = target_power_df[
        target_power_df["carrier"] == "wind_onshore"
    ]
    target_power_df.set_index("nuts", inplace=True)
    target_power_df["geom"] = Point(0, 0)

    # Join the geometries which belong to the same states
    for std in target_power_df.index:
        df = federal_std[federal_std["nuts"] == std]
        if df.size > 0:
            target_power_df.at[std, "name"] = df["gen"].iat[0]
        else:
            target_power_df.at[std, "name"] = np.nan
        target_power_df.at[std, "geom"] = df.unary_union
    target_power_df = gpd.GeoDataFrame(
        target_power_df, geometry="geom", crs=4326
    )
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
    target_power_df = target_power_df.to_crs(3035)

    # Create the shape for full Germany
    target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
    target_power_df.at["DE", "name"] = "Germany"
    # Generate WFs for Germany based on potential areas and existing WFs
    wf_areas, wf_areas_ni = generate_wind_farms()

    # Change the columns "geometry" of this GeoDataFrames
    wf_areas.set_geometry("centroid", inplace=True)
    wf_areas_ni.set_geometry("centroid", inplace=True)

    # Create centroids of mv_districts to apply the clip function
    mv_districts["centroid"] = mv_districts.centroid
    mv_districts.set_geometry("centroid", inplace=True)

    summary_t = pd.DataFrame()
    farms = pd.DataFrame()

    # Fit wind farms scenarions for each one of the states
    for scenario in target_power_df.index:
        state_wf = gpd.clip(wf_areas, target_power_df.at[scenario, "geom"])
        state_wf_ni = gpd.clip(
            wf_areas_ni, target_power_df.at[scenario, "geom"]
        )
        state_mv_districts = gpd.clip(
            mv_districts, target_power_df.at[scenario, "geom"]
        )
        target_power = target_power_df.at[scenario, "capacity"]
        scenario_year = target_power_df.at[scenario, "scenario_name"]
        source = target_power_df.at[scenario, "carrier"]
        fed_state = target_power_df.at[scenario, "name"]
        wind_farms_state, summary_state = wind_power_states(
            state_wf,
            state_wf_ni,
            state_mv_districts,
            target_power,
            scenario_year,
            source,
            fed_state,
        )
        summary_t = summary_t.append(summary_state)
        farms = farms.append(wind_farms_state)

    generate_map()

    return (summary_t, farms)


def generate_wind_farms():
    """Generate wind farms based on existing wind farms.

    Parameters
    ----------
    *No parameters required

    """
    # get config
    cfg = egon.data.config.datasets()["power_plants"]

    # Due to typos in some inputs, some areas of existing wind farms
    # should be discarded using perimeter and area filters
    def filter_current_wf(wf_geometry):
        if wf_geometry.geom_type == "Point":
            return True
        if wf_geometry.geom_type == "Polygon":
            area = wf_geometry.area
            length = wf_geometry.length
            # Filter based on the biggest (# of WT) wind farm
            return (area < 40000000) & (length < 40000)
        if wf_geometry.geom_type == "LineString":
            length = wf_geometry.length
            return length < 1008  # 8 * rotor diameter (8*126m)

    # The function 'wind_farm' returns the connection point of a wind turbine
    def wind_farm(x):
        try:
            return map_ap_wea_farm[x]
        except:
            return np.nan

    # The function 'voltage' returns the voltage level a wind turbine operates
    def voltage(x):
        try:
            return map_ap_wea_voltage[x]
        except:
            return np.nan

    # Connect to the data base
    con = db.engine()
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
    # wf_areas has all the potential areas geometries for wind farms
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
    # bus has the connection points of the wind farms
    bus = pd.read_csv(
        WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
        index_col="MaStRNummer",
    )
    # Drop all the rows without connection point
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
    # wea has info of each wind turbine in Germany.
    wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])

    # Delete all the rows without information about geographical location
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
    # Delete all the offshore wind turbines
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
    # the variable map_ap_wea_farm have the connection point of all the
    # available wt in the dataframe bus.
    map_ap_wea_farm = {}
    map_ap_wea_voltage = {}

    wea["connection point"] = wea["LokationMastrNummer"].map(
        bus["NetzanschlusspunktMastrNummer"]
    )
    wea["voltage"] = wea["LokationMastrNummer"].map(
        bus["NetzanschlusspunktMastrNummer"]
    )

    # Create the columns 'geometry' which will have location of each WT in a
    # point type
    wea = gpd.GeoDataFrame(
        wea,
        geometry=gpd.points_from_xy(
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
        ),
    )

    # wf_size storages the number of WT connected to each connection point
    wf_size = wea["connection point"].value_counts()
    # Delete all the connection points with less than 3 WT
    wf_size = wf_size[wf_size >= 3]
    # Filter all the WT which are not part of a wind farm of at least 3 WT
    wea = wea[wea["connection point"].isin(wf_size.index)]
    # current_wfs has all the geometries that represent the existing wind
    # farms
    current_wfs = gpd.GeoDataFrame(
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
    )
    for conn_point, wt_location in wea.groupby("connection point"):
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
            wt_location["geometry"].values
        ).convex_hull
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]

    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
    # The 'filter_wts' is used to discard atypical values for the current wind
    # farms
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
        lambda x: filter_current_wf(x)
    )

    # Apply the filter based on area and perimeter
    current_wfs = current_wfs[current_wfs["filter2"]]
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])

    wf_areas["area [km²]"] = wf_areas.area / 1000000

    # Exclude areas smaller than X m². X was calculated as the area of
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
    min_area = 4 * (0.126**2) * np.sqrt(3)
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]

    # Find the centroid of all the suitable potential areas
    wf_areas["centroid"] = wf_areas.centroid

    # find the potential areas that intersects the convex hulls of current
    # wind farms and assign voltage levels
    wf_areas = wf_areas.to_crs(4326)
    for i in wf_areas.index:
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
        if intersection.any() == False:
            wf_areas.at[i, "voltage"] = "No Intersection"
        else:
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]

    # wf_areas_ni has the potential areas which don't intersect any current
    # wind farm
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
    return wf_areas, wf_areas_ni


def wind_power_states(
    state_wf,
    state_wf_ni,
    state_mv_districts,
    target_power,
    scenario_year,
    source,
    fed_state,
):
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
    database.

    Parameters
    ----------
    state_wf: geodataframe, mandatory
        gdf containing all the wf in the state created based on existing wf.
    state_wf_ni: geodataframe, mandatory
        potential areas in the the state wich don't intersect any existing wf
    state_mv_districts: geodataframe, mandatory
        gdf containing all the MV/HV substations in the state
    target_power: int, mandatory
        Objective power for a state given in MW
    scenario_year: str, mandatory
        name of the scenario
    source: str, mandatory
        Type of energy genetor. Always "Wind_onshore" for this script.
    fed_state: str, mandatory
        Name of the state where the wind farms will be allocated

    """

    def match_district_se(x):
        for sub in hvmv_substation.index:
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
                return hvmv_substation.at[sub, "point"]

    con = db.engine()
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
    # hvmv_substation has the information about HV transmission lines in
    # Germany
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")

    # Set wind potential depending on geographical location
    power_north = 21.05  # MW/km²
    power_south = 16.81  # MW/km²
    # Set a maximum installed capacity to limit the power of big potential
    # areas
    max_power_hv = 120  # in MW
    max_power_mv = 20  # in MW
    # Max distance between WF (connected to MV) and nearest HV substation
    # that allows its connection to HV.
    max_dist_hv = 20000  # in meters

    summary = pd.DataFrame(
        columns=["state", "target", "from existin WF", "MV districts"]
    )

    north = [
        "Schleswig-Holstein",
        "Mecklenburg-Vorpommern",
        "Niedersachsen",
        "Bremen",
        "Hamburg",
    ]

    if fed_state in north:
        state_wf["inst capacity [MW]"] = power_north * state_wf["area [km²]"]
    else:
        state_wf["inst capacity [MW]"] = power_south * state_wf["area [km²]"]

    # Divide selected areas based on voltage of connection points
    wf_mv = state_wf[
        (state_wf["voltage"] != "Hochspannung")
        & (state_wf["voltage"] != "Hoechstspannung")
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
    ]

    wf_hv = state_wf[
        (state_wf["voltage"] == "Hochspannung")
        | (state_wf["voltage"] == "Hoechstspannung")
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
    ]

    # Wind farms connected to MV network will be connected to HV network if
    # the distance to the closest HV substation is =< max_dist_hv, and the
    # installed capacity is bigger than max_power_mv
    hvmv_substation = hvmv_substation.to_crs(3035)
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
        lambda x: int(x.split(";")[0])
    )
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
    hv_substations = hv_substations.unary_union  # join all the hv_substations
    wf_mv["dist_to_HV"] = (
        state_wf["geom"].to_crs(3035).distance(hv_substations)
    )
    wf_mv_to_hv = wf_mv[
        (wf_mv["dist_to_HV"] <= max_dist_hv)
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
    ]
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
    wf_mv_to_hv["voltage"] = "Hochspannung"

    wf_hv = wf_hv.append(wf_mv_to_hv)
    wf_mv = wf_mv[
        (wf_mv["dist_to_HV"] > max_dist_hv)
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
    ]
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])

    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_hv else max_power_hv
    )

    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_mv else max_power_mv
    )

    wind_farms = wf_hv.append(wf_mv)

    # Adjust the total installed capacity to the scenario
    total_wind_power = (
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
    )
    if total_wind_power > target_power:
        scale_factor = target_power / total_wind_power
        wf_mv["inst capacity [MW]"] = (
            wf_mv["inst capacity [MW]"] * scale_factor
        )
        wf_hv["inst capacity [MW]"] = (
            wf_hv["inst capacity [MW]"] * scale_factor
        )
        wind_farms = wf_hv.append(wf_mv)
        summary = summary.append(
            {
                "state": fed_state,
                "target": target_power,
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
                "MV districts": 0,
            },
            ignore_index=True,
        )
    else:
        extra_wf = state_mv_districts.copy()
        extra_wf = extra_wf.drop(columns=["centroid"])
        # the column centroid has the coordinates of the substation
        # corresponding to each mv_grid_district
        extra_wf["centroid"] = extra_wf.apply(match_district_se, axis=1)
        extra_wf = extra_wf.set_geometry("centroid")
        extra_wf["area [km²]"] = 0.0
        for district in extra_wf.index:
            try:
                pot_area_district = gpd.clip(
                    state_wf_ni, extra_wf.at[district, "geom"]
                )
                extra_wf.at[district, "area [km²]"] = pot_area_district[
                    "area [km²]"
                ].sum()
            except:
                print(district)
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
        total_new_area = extra_wf["area [km²]"].sum()
        scale_factor = (target_power - total_wind_power) / total_new_area
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
        extra_wf["voltage"] = "Hochspannung"
        summary = summary.append(
            {
                "state": fed_state,
                "target": target_power,
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
                "MV districts": extra_wf["inst capacity [MW]"].sum(),
            },
            ignore_index=True,
        )
        wind_farms = wind_farms.append(extra_wf, ignore_index=True)

    # Use Definition of thresholds for voltage level assignment
    wind_farms["voltage_level"] = 0
    for i in wind_farms.index:
        try:
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
                wind_farms.at[i, "voltage_level"] = 5
                continue
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
                wind_farms.at[i, "voltage_level"] = 4
                continue
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
                wind_farms.at[i, "voltage_level"] = 3
                continue
        except:
            print(i)

    # Look for the maximum id in the table egon_power_plants
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
    max_id = pd.read_sql(sql, con)
    max_id = max_id["max"].iat[0]
    if max_id is None:
        wind_farm_id = 1
    else:
        wind_farm_id = int(max_id + 1)

    # write_table in egon-data database:

    # Copy relevant columns from wind_farms
    insert_wind_farms = wind_farms[
        ["inst capacity [MW]", "voltage_level", "centroid"]
    ]

    # Set static column values
    insert_wind_farms["carrier"] = source
    insert_wind_farms["scenario"] = scenario_year

    # Change name and crs of geometry column
    insert_wind_farms = (
        insert_wind_farms.rename(
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
        )
        .set_geometry("geom")
        .to_crs(4326)
    )

    # Reset index
    insert_wind_farms.index = pd.RangeIndex(
        start=wind_farm_id,
        stop=wind_farm_id + len(insert_wind_farms),
        name="id",
    )

    # Insert into database
    insert_wind_farms.reset_index().to_postgis(
        "egon_power_plants",
        schema="supply",
        con=db.engine(),
        if_exists="append",
    )
    return wind_farms, summary


def generate_map():
    """Generates a map with the position of all the wind farms

    Parameters
    ----------
    *No parameters required

    """
    con = db.engine()

    # Import wind farms from egon-data
    sql = (
        "SELECT  carrier, el_capacity, geom, scenario FROM "
        "supply.egon_power_plants WHERE carrier = 'wind_onshore'"
    )
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geom", crs=4326
    )
    wind_farms_t = wind_farms_t.to_crs(3035)

    for scenario in wind_farms_t.scenario.unique():
        wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
        # mv_districts has geographic info of medium voltage districts in
        # Germany
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
        mv_districts = mv_districts.to_crs(3035)

        mv_districts["power"] = 0.0
        for std in mv_districts.index:
            try:
                mv_districts.at[std, "power"] = gpd.clip(
                    wind_farms, mv_districts.at[std, "geom"]
                ).el_capacity.sum()
            except:
                print(std)

        fig, ax = plt.subplots(1, 1)
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
        mv_districts.plot(
            ax=ax,
            column="power",
            cmap="magma_r",
            legend=True,
            legend_kwds={
                "label": "Installed capacity in MW",
                "orientation": "vertical",
            },
        )
        plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
    return 0


1			from matplotlib import pyplot as plt
2			from shapely.geometry import MultiPoint, Point
3			import geopandas as gpd
4			import numpy as np
5			import pandas as pd
6
7			from egon.data import db
8			from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW
9			import egon.data.config
10
11
12			def insert():
13			"""Main function. Import power objectives generate results calling the
14			functions "generate_wind_farms" and "wind_power_states".
15
16			Parameters
17			----------
18			*No parameters required
19
20			"""
21
22			con = db.engine()
23
24			# federal_std has the shapes of the German states
25			sql = "SELECT gen, gf, nuts, geometry FROM boundaries.vg250_lan"
26			federal_std = gpd.GeoDataFrame.from_postgis(
27			sql, con, geom_col="geometry", crs=4326
28			)
29
30			# target_power_df has the expected capacity of each federal state
31			sql = (
32			"SELECT carrier, capacity, nuts, scenario_name FROM "
33			"supply.egon_scenario_capacities"
34			)
35			target_power_df = pd.read_sql(sql, con)
36
37			# mv_districts has geographic info of medium voltage districts in Germany
38			sql = "SELECT geom FROM grid.egon_mv_grid_district"
39			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
40
41			# Delete all the water bodies from the federal states shapes
42			federal_std = federal_std[federal_std["gf"] == 4]
43			federal_std.drop(columns=["gf"], inplace=True)
44			# Filter the potential expected from wind_onshore
45			target_power_df = target_power_df[
46			target_power_df["carrier"] == "wind_onshore"
47			]
48			target_power_df.set_index("nuts", inplace=True)
49			target_power_df["geom"] = Point(0, 0)
50
51			# Join the geometries which belong to the same states
52			for std in target_power_df.index:
53			df = federal_std[federal_std["nuts"] == std]
54			if df.size > 0:
55			target_power_df.at[std, "name"] = df["gen"].iat[0]
56			else:
57			target_power_df.at[std, "name"] = np.nan
58			target_power_df.at[std, "geom"] = df.unary_union
59			target_power_df = gpd.GeoDataFrame(
60			target_power_df, geometry="geom", crs=4326
61			)
62			target_power_df = target_power_df[target_power_df["capacity"] > 0]
63			target_power_df = target_power_df.to_crs(3035)
64
65			# Create the shape for full Germany
66			target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union
67			target_power_df.at["DE", "name"] = "Germany"
68			# Generate WFs for Germany based on potential areas and existing WFs
69			wf_areas, wf_areas_ni = generate_wind_farms()
70
71			# Change the columns "geometry" of this GeoDataFrames
72			wf_areas.set_geometry("centroid", inplace=True)
73			wf_areas_ni.set_geometry("centroid", inplace=True)
74
75			# Create centroids of mv_districts to apply the clip function
76			mv_districts["centroid"] = mv_districts.centroid
77			mv_districts.set_geometry("centroid", inplace=True)
78
79			summary_t = pd.DataFrame()
80			farms = pd.DataFrame()
81
82			# Fit wind farms scenarions for each one of the states
83			for scenario in target_power_df.index:
84			state_wf = gpd.clip(wf_areas, target_power_df.at[scenario, "geom"])
85			state_wf_ni = gpd.clip(
86			wf_areas_ni, target_power_df.at[scenario, "geom"]
87			)
88			state_mv_districts = gpd.clip(
89			mv_districts, target_power_df.at[scenario, "geom"]
90			)
91			target_power = target_power_df.at[scenario, "capacity"]
92			scenario_year = target_power_df.at[scenario, "scenario_name"]
93			source = target_power_df.at[scenario, "carrier"]
94			fed_state = target_power_df.at[scenario, "name"]
95			wind_farms_state, summary_state = wind_power_states(
96			state_wf,
97			state_wf_ni,
98			state_mv_districts,
99			target_power,
100			scenario_year,
101			source,
102			fed_state,
103			)
104			summary_t = summary_t.append(summary_state)
105			farms = farms.append(wind_farms_state)
106
107			generate_map()
108
109			return (summary_t, farms)
110
111
112			def generate_wind_farms():
113			"""Generate wind farms based on existing wind farms.
114
115			Parameters
116			----------
117			*No parameters required
118
119			"""
120			# get config
121			cfg = egon.data.config.datasets()["power_plants"]
122
123			# Due to typos in some inputs, some areas of existing wind farms
124			# should be discarded using perimeter and area filters
125			def filter_current_wf(wf_geometry):
126			if wf_geometry.geom_type == "Point":
127			return True
128			if wf_geometry.geom_type == "Polygon":
129			area = wf_geometry.area
130			length = wf_geometry.length
131			# Filter based on the biggest (# of WT) wind farm
132			return (area < 40000000) & (length < 40000)
133			if wf_geometry.geom_type == "LineString":
134			length = wf_geometry.length
135			return length < 1008 # 8 * rotor diameter (8*126m)
136
137			# The function 'wind_farm' returns the connection point of a wind turbine
138			def wind_farm(x):
139			try:
140			return map_ap_wea_farm[x]
141			except:
142			return np.nan
143
144			# The function 'voltage' returns the voltage level a wind turbine operates
145			def voltage(x):
146			try:
147			return map_ap_wea_voltage[x]
148			except:
149			return np.nan
150
151			# Connect to the data base
152			con = db.engine()
153			sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
154			# wf_areas has all the potential areas geometries for wind farms
155			wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
156			# bus has the connection points of the wind farms
157			bus = pd.read_csv(
158			WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"],
159			index_col="MaStRNummer",
160			)
161			# Drop all the rows without connection point
162			bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
163			# wea has info of each wind turbine in Germany.
164			wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"])
165
166			# Delete all the rows without information about geographical location
167			wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
168			# Delete all the offshore wind turbines
169			wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
170			# the variable map_ap_wea_farm have the connection point of all the
171			# available wt in the dataframe bus.
172			map_ap_wea_farm = {}
173			map_ap_wea_voltage = {}
174
175			wea["connection point"] = wea["LokationMastrNummer"].map(
176			bus["NetzanschlusspunktMastrNummer"]
177			)
178			wea["voltage"] = wea["LokationMastrNummer"].map(
179			bus["NetzanschlusspunktMastrNummer"]
180			)
181
182			# Create the columns 'geometry' which will have location of each WT in a
183			# point type
184			wea = gpd.GeoDataFrame(
185			wea,
186			geometry=gpd.points_from_xy(
187			wea["Laengengrad"], wea["Breitengrad"], crs=4326
188			),
189			)
190
191			# wf_size storages the number of WT connected to each connection point
192			wf_size = wea["connection point"].value_counts()
193			# Delete all the connection points with less than 3 WT
194			wf_size = wf_size[wf_size >= 3]
195			# Filter all the WT which are not part of a wind farm of at least 3 WT
196			wea = wea[wea["connection point"].isin(wf_size.index)]
197			# current_wfs has all the geometries that represent the existing wind
198			# farms
199			current_wfs = gpd.GeoDataFrame(
200			index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
201			)
202			for conn_point, wt_location in wea.groupby("connection point"):
203			current_wfs.at[conn_point, "geometry"] = MultiPoint(
204			wt_location["geometry"].values
205			).convex_hull
206			current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
207
208			current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
209			current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
210			current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
211			# The 'filter_wts' is used to discard atypical values for the current wind
212			# farms
213			current_wfs["filter2"] = current_wfs["geometry2"].apply(
214			lambda x: filter_current_wf(x)
215			)
216
217			# Apply the filter based on area and perimeter
218			current_wfs = current_wfs[current_wfs["filter2"]]
219			current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
220
221			wf_areas["area [km²]"] = wf_areas.area / 1000000
222
223			# Exclude areas smaller than X m². X was calculated as the area of
224			# 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
225			min_area = 4 * (0.126*2) np.sqrt(3)
226			wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
227
228			# Find the centroid of all the suitable potential areas
229			wf_areas["centroid"] = wf_areas.centroid
230
231			# find the potential areas that intersects the convex hulls of current
232			# wind farms and assign voltage levels
233			wf_areas = wf_areas.to_crs(4326)
234			for i in wf_areas.index:
235			intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
236			if intersection.any() == False:
237			wf_areas.at[i, "voltage"] = "No Intersection"
238			else:
239			wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
240
241			# wf_areas_ni has the potential areas which don't intersect any current
242			# wind farm
243			wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
244			wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
245			return wf_areas, wf_areas_ni
246
247
248			def wind_power_states(
249			state_wf,
250			state_wf_ni,
251			state_mv_districts,
252			target_power,
253			scenario_year,
254			source,
255			fed_state,
256			):
257			"""Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL
258			database.
259
260			Parameters
261			----------
262			state_wf: geodataframe, mandatory
263			gdf containing all the wf in the state created based on existing wf.
264			state_wf_ni: geodataframe, mandatory
265			potential areas in the the state wich don't intersect any existing wf
266			state_mv_districts: geodataframe, mandatory
267			gdf containing all the MV/HV substations in the state
268			target_power: int, mandatory
269			Objective power for a state given in MW
270			scenario_year: str, mandatory
271			name of the scenario
272			source: str, mandatory
273			Type of energy genetor. Always "Wind_onshore" for this script.
274			fed_state: str, mandatory
275			Name of the state where the wind farms will be allocated
276
277			"""
278
279			def match_district_se(x):
280			for sub in hvmv_substation.index:
281			if x["geom"].contains(hvmv_substation.at[sub, "point"]):
282			return hvmv_substation.at[sub, "point"]
283
284			con = db.engine()
285			sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
286			# hvmv_substation has the information about HV transmission lines in
287			# Germany
288			hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
289
290			# Set wind potential depending on geographical location
291			power_north = 21.05 # MW/km²
292			power_south = 16.81 # MW/km²
293			# Set a maximum installed capacity to limit the power of big potential
294			# areas
295			max_power_hv = 120 # in MW
296			max_power_mv = 20 # in MW
297			# Max distance between WF (connected to MV) and nearest HV substation
298			# that allows its connection to HV.
299			max_dist_hv = 20000 # in meters
300
301			summary = pd.DataFrame(
302			columns=["state", "target", "from existin WF", "MV districts"]
303			)
304
305			north = [
306			"Schleswig-Holstein",
307			"Mecklenburg-Vorpommern",
308			"Niedersachsen",
309			"Bremen",
310			"Hamburg",
311			]
312
313			if fed_state in north:
314			state_wf["inst capacity [MW]"] = power_north * state_wf["area [km²]"]
315			else:
316			state_wf["inst capacity [MW]"] = power_south * state_wf["area [km²]"]
317
318			# Divide selected areas based on voltage of connection points
319			wf_mv = state_wf[
320			(state_wf["voltage"] != "Hochspannung")
321			& (state_wf["voltage"] != "Hoechstspannung")
322			& (state_wf["voltage"] != "UmspannungZurHochspannung")
323			]
324
325			wf_hv = state_wf[
326			(state_wf["voltage"] == "Hochspannung")
327			\| (state_wf["voltage"] == "Hoechstspannung")
328			\| (state_wf["voltage"] == "UmspannungZurHochspannung")
329			]
330
331			# Wind farms connected to MV network will be connected to HV network if
332			# the distance to the closest HV substation is =< max_dist_hv, and the
333			# installed capacity is bigger than max_power_mv
334			hvmv_substation = hvmv_substation.to_crs(3035)
335			hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
336			lambda x: int(x.split(";")[0])
337			)
338			hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
339			hv_substations = hv_substations.unary_union # join all the hv_substations
340			wf_mv["dist_to_HV"] = (
341			state_wf["geom"].to_crs(3035).distance(hv_substations)
342			)
343			wf_mv_to_hv = wf_mv[
344			(wf_mv["dist_to_HV"] <= max_dist_hv)
345			& (wf_mv["inst capacity [MW]"] >= max_power_mv)
346			]
347			wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
348			wf_mv_to_hv["voltage"] = "Hochspannung"
349
350			wf_hv = wf_hv.append(wf_mv_to_hv)
351			wf_mv = wf_mv[
352			(wf_mv["dist_to_HV"] > max_dist_hv)
353			\| (wf_mv["inst capacity [MW]"] < max_power_mv)
354			]
355			wf_mv = wf_mv.drop(columns=["dist_to_HV"])
356
357			wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
358			lambda x: x if x < max_power_hv else max_power_hv
359			)
360
361			wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
362			lambda x: x if x < max_power_mv else max_power_mv
363			)
364
365			wind_farms = wf_hv.append(wf_mv)
366
367			# Adjust the total installed capacity to the scenario
368			total_wind_power = (
369			wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
370			)
371			if total_wind_power > target_power:
372			scale_factor = target_power / total_wind_power
373			wf_mv["inst capacity [MW]"] = (
374			wf_mv["inst capacity [MW]"] * scale_factor
375			)
376			wf_hv["inst capacity [MW]"] = (
377			wf_hv["inst capacity [MW]"] * scale_factor
378			)
379			wind_farms = wf_hv.append(wf_mv)
380			summary = summary.append(
381			{
382			"state": fed_state,
383			"target": target_power,
384			"from existin WF": wind_farms["inst capacity [MW]"].sum(),
385			"MV districts": 0,
386			},
387			ignore_index=True,
388			)
389			else:
390			extra_wf = state_mv_districts.copy()
391			extra_wf = extra_wf.drop(columns=["centroid"])
392			# the column centroid has the coordinates of the substation
393			# corresponding to each mv_grid_district
394			extra_wf["centroid"] = extra_wf.apply(match_district_se, axis=1)
395			extra_wf = extra_wf.set_geometry("centroid")
396			extra_wf["area [km²]"] = 0.0
397			for district in extra_wf.index:
398			try:
399			pot_area_district = gpd.clip(
400			state_wf_ni, extra_wf.at[district, "geom"]
401			)
402			extra_wf.at[district, "area [km²]"] = pot_area_district[
403			"area [km²]"
404			].sum()
405			except:
406			print(district)
407			extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
408			total_new_area = extra_wf["area [km²]"].sum()
409			scale_factor = (target_power - total_wind_power) / total_new_area
410			extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
411			extra_wf["voltage"] = "Hochspannung"
412			summary = summary.append(
413			{
414			"state": fed_state,
415			"target": target_power,
416			"from existin WF": wind_farms["inst capacity [MW]"].sum(),
417			"MV districts": extra_wf["inst capacity [MW]"].sum(),
418			},
419			ignore_index=True,
420			)
421			wind_farms = wind_farms.append(extra_wf, ignore_index=True)
422
423			# Use Definition of thresholds for voltage level assignment
424			wind_farms["voltage_level"] = 0
425			for i in wind_farms.index:
426			try:
427			if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
428			wind_farms.at[i, "voltage_level"] = 5
429			continue
430			if wind_farms.at[i, "inst capacity [MW]"] < 20:
431			wind_farms.at[i, "voltage_level"] = 4
432			continue
433			if wind_farms.at[i, "inst capacity [MW]"] >= 20:
434			wind_farms.at[i, "voltage_level"] = 3
435			continue
436			except:
437			print(i)
438
439			# Look for the maximum id in the table egon_power_plants
440			sql = "SELECT MAX(id) FROM supply.egon_power_plants"
441			max_id = pd.read_sql(sql, con)
442			max_id = max_id["max"].iat[0]
443			if max_id is None:
444			wind_farm_id = 1
445			else:
446			wind_farm_id = int(max_id + 1)
447
448			# write_table in egon-data database:
449
450			# Copy relevant columns from wind_farms
451			insert_wind_farms = wind_farms[
452			["inst capacity [MW]", "voltage_level", "centroid"]
453			]
454
455			# Set static column values
456			insert_wind_farms["carrier"] = source
457			insert_wind_farms["scenario"] = scenario_year
458
459			# Change name and crs of geometry column
460			insert_wind_farms = (
461			insert_wind_farms.rename(
462			{"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
463			)
464			.set_geometry("geom")
465			.to_crs(4326)
466			)
467
468			# Reset index
469			insert_wind_farms.index = pd.RangeIndex(
470			start=wind_farm_id,
471			stop=wind_farm_id + len(insert_wind_farms),
472			name="id",
473			)
474
475			# Insert into database
476			insert_wind_farms.reset_index().to_postgis(
477			"egon_power_plants",
478			schema="supply",
479			con=db.engine(),
480			if_exists="append",
481			)
482			return wind_farms, summary
483
484
485			def generate_map():
486			"""Generates a map with the position of all the wind farms
487
488			Parameters
489			----------
490			*No parameters required
491
492			"""
493			con = db.engine()
494
495			# Import wind farms from egon-data
496			sql = (
497			"SELECT carrier, el_capacity, geom, scenario FROM "
498			"supply.egon_power_plants WHERE carrier = 'wind_onshore'"
499			)
500			wind_farms_t = gpd.GeoDataFrame.from_postgis(
501			sql, con, geom_col="geom", crs=4326
502			)
503			wind_farms_t = wind_farms_t.to_crs(3035)
504
505			for scenario in wind_farms_t.scenario.unique():
506			wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario]
507			# mv_districts has geographic info of medium voltage districts in
508			# Germany
509			sql = "SELECT geom FROM grid.egon_mv_grid_district"
510			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
511			mv_districts = mv_districts.to_crs(3035)
512
513			mv_districts["power"] = 0.0
514			for std in mv_districts.index:
515			try:
516			mv_districts.at[std, "power"] = gpd.clip(
517			wind_farms, mv_districts.at[std, "geom"]
518			).el_capacity.sum()
519			except:
520			print(std)
521
522			fig, ax = plt.subplots(1, 1)
523			mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
524			mv_districts.plot(
525			ax=ax,
526			column="power",
527			cmap="magma_r",
528			legend=True,
529			legend_kwds={
530			"label": "Installed capacity in MW",
531			"orientation": "vertical",
532			},
533			)
534			plt.savefig(f"wind_farms_{scenario}.png", dpi=300)
535			return 0
536

openego / eGon-data

Pull Request — dev (#1063)

generate_wind_farms() D

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like