data.datasets.power_plants.wind_farms - Code Metrics - Inspection of "Features/#519 update mastr data" - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1053)

unknown

created 2022-12-02 10:29 UTC

data.datasets.power_plants.wind_farms A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	517
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	38
eloc	309
dl	0
loc	517
rs	9.36
c	0
b	0
f	0

4 Functions

Rating	Name	Size	Complexity
B	insert()	95	4
F	wind_power_states()	232	18
A	generate_map()	48	4
D	generate_wind_farms()	125	12

from matplotlib import pyplot as plt
from shapely.geometry import LineString, MultiPoint, Point, Polygon
import geopandas as gpd
import numpy as np
import pandas as pd

import egon.data.config
from egon.data import db


def insert():
    """Main function. Import power objectives generate results calling the
    functions "generate_wind_farms" and  "wind_power_states".

    Parameters
    ----------
    *No parameters required

    """

    con = db.engine()

    # federal_std has the shapes of the German states
    sql = "SELECT  gen, gf, nuts, geometry FROM boundaries.vg250_lan"
    federal_std = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geometry", crs=4326
    )

    # target_power_df has the expected capacity of each federal state
    sql = "SELECT  carrier, capacity, nuts, scenario_name FROM supply.egon_scenario_capacities"
    target_power_df = pd.read_sql(sql, con)

    # mv_districts has geographic info of medium voltage districts in Germany
    sql = "SELECT geom FROM grid.egon_mv_grid_district"
    mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)

    # Delete all the water bodies from the federal states shapes
    federal_std = federal_std[federal_std["gf"] == 4]
    federal_std.drop(columns=["gf"], inplace=True)
    # Filter the potential expected from wind_onshore
    target_power_df = target_power_df[
        target_power_df["carrier"] == "wind_onshore"
    ]
    target_power_df.set_index("nuts", inplace=True)
    target_power_df["geom"] = Point(0, 0)

    # Join the geometries which belong to the same states
    for std in target_power_df.index:
        df = federal_std[federal_std["nuts"] == std]
        if df.size > 0:
            target_power_df.at[std, "name"] = df["gen"].iat[0]
        else:
            target_power_df.at[std, "name"] = np.nan
        target_power_df.at[std, "geom"] = df.unary_union
    target_power_df = gpd.GeoDataFrame(
        target_power_df, geometry="geom", crs=4326
    )
    target_power_df = target_power_df[target_power_df["capacity"] > 0]
    target_power_df = target_power_df.to_crs(3035)

    #Create the shape for full Germany
    target_power_df.at['DE', 'geom'] = target_power_df['geom'].unary_union
    target_power_df.at['DE', 'name'] = 'Germany'
    # Generate WFs for Germany based on potential areas and existing WFs
    wf_areas, wf_areas_ni = generate_wind_farms()

    # Change the columns "geometry" of this GeoDataFrames
    wf_areas.set_geometry("centroid", inplace=True)
    wf_areas_ni.set_geometry("centroid", inplace=True)

    # Create centroids of mv_districts to apply the clip function
    mv_districts["centroid"] = mv_districts.centroid
    mv_districts.set_geometry("centroid", inplace=True)

    summary_t = pd.DataFrame()
    farms = pd.DataFrame()

    # Fit wind farms scenarions for each one of the states
    for scenario in target_power_df.index:
        state_wf = gpd.clip(wf_areas, target_power_df.at[scenario, "geom"])
        state_wf_ni = gpd.clip(
            wf_areas_ni, target_power_df.at[scenario, "geom"]
        )
        state_mv_districts = gpd.clip(
            mv_districts, target_power_df.at[scenario, "geom"]
        )
        target_power = target_power_df.at[scenario, "capacity"]
        scenario_year = target_power_df.at[scenario, "scenario_name"]
        source = target_power_df.at[scenario, "carrier"]
        fed_state = target_power_df.at[scenario, "name"]
        wind_farms_state, summary_state = wind_power_states(
            state_wf,
            state_wf_ni,
            state_mv_districts,
            target_power,
            scenario_year,
            source,
            fed_state,
        )
        summary_t = summary_t.append(summary_state)
        farms = farms.append(wind_farms_state)

    generate_map()

    return (summary_t, farms)


def generate_wind_farms():
    """Generate wind farms based on existing wind farms.

    Parameters
    ----------
    *No parameters required

    """
    # get config
    cfg = egon.data.config.datasets()["power_plants"]

    # Due to typos in some inputs, some areas of existing wind farms
    # should be discarded using perimeter and area filters
    def filter_current_wf(wf_geometry):
        if wf_geometry.geom_type == "Point":
            return True
        if wf_geometry.geom_type == "Polygon":
            area = wf_geometry.area
            length = wf_geometry.length
            # Filter based on the biggest (# of WT) wind farm
            return (area < 40000000) & (length < 40000)
        if wf_geometry.geom_type == "LineString":
            length = wf_geometry.length
            return length < 1008  # 8 * rotor diameter (8*126m)

    # The function 'wind_farm' returns the connection point of a wind turbine
    def wind_farm(x):
        try:
            return map_ap_wea_farm[x]
        except:
            return np.nan

    # The function 'voltage' returns the voltage level a wind turbine operates
    def voltage(x):
        try:
            return map_ap_wea_voltage[x]
        except:
            return np.nan

    # Connect to the data base
    con = db.engine()
    sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
    # wf_areas has all the potential areas geometries for wind farms
    wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
    # bus has the connection points of the wind farms
    bus = pd.read_csv(cfg["sources"]["mastr_location"], index_col= "MaStRNummer")
    # Drop all the rows without connection point
    bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
    # wea has info of each wind turbine in Germany.
    wea = pd.read_csv(cfg["sources"]["mastr_wind"])

    # Delete all the rows without information about geographical location
    wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
    # Delete all the offshore wind turbines
    wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
    # the variable map_ap_wea_farm have the connection point of all the available wt
    # in the dataframe bus.
    map_ap_wea_farm = {}
    map_ap_wea_voltage = {}

    wea["connection point"] = wea["LokationMastrNummer"].map(
        bus["NetzanschlusspunktMastrNummer"])
    wea["voltage"] = wea["LokationMastrNummer"].map(
        bus["NetzanschlusspunktMastrNummer"])

    # Create the columns 'geometry' which will have location of each WT in a point type
    wea = gpd.GeoDataFrame(
        wea,
        geometry=gpd.points_from_xy(
            wea["Laengengrad"], wea["Breitengrad"], crs=4326
        ),
    )

    # wf_size storages the number of WT connected to each connection point
    wf_size = wea["connection point"].value_counts()
    # Delete all the connection points with less than 3 WT
    wf_size = wf_size[wf_size >= 3]
    # Filter all the WT which are not part of a wind farm of at least 3 WT
    wea = wea[wea["connection point"].isin(wf_size.index)]
    # current_wfs has all the geometries that represent the existing wind farms
    current_wfs = gpd.GeoDataFrame(
        index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
    )
    for conn_point, wt_location in wea.groupby("connection point"):
        current_wfs.at[conn_point, "geometry"] = MultiPoint(
            wt_location["geometry"].values
        ).convex_hull
        current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]

    current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
    current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
    current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
    # The 'filter_wts' is used to discard atypical values for the current wind farms
    current_wfs["filter2"] = current_wfs["geometry2"].apply(
        lambda x: filter_current_wf(x)
    )

    # Apply the filter based on area and perimeter
    current_wfs = current_wfs[current_wfs["filter2"]]
    current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])

    wf_areas["area [km²]"] = wf_areas.area / 1000000

    # Exclude areas smaller than X m². X was calculated as the area of
    # 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
    min_area = 4 * (0.126 ** 2) * np.sqrt(3)
    wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]

    # Find the centroid of all the suitable potential areas
    wf_areas["centroid"] = wf_areas.centroid

    # find the potential areas that intersects the convex hulls of current wind farms
    # and assign voltage levels
    wf_areas = wf_areas.to_crs(4326)
    for i in wf_areas.index:
        intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
        if intersection.any() == False:
            wf_areas.at[i, "voltage"] = "No Intersection"
        else:
            wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]

    # wf_areas_ni has the potential areas which don't intersect any current wind farm
    wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
    wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
    return wf_areas, wf_areas_ni


def wind_power_states(
    state_wf,
    state_wf_ni,
    state_mv_districts,
    target_power,
    scenario_year,
    source,
    fed_state,
):
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database.

    Parameters
    ----------
    state_wf: geodataframe, mandatory
        gdf containing all the wf in the state created based on existing wf.
    state_wf_ni: geodataframe, mandatory
        potential areas in the the state wich don't intersect any existing wf
    state_mv_districts: geodataframe, mandatory
        gdf containing all the MV/HV substations in the state
    target_power: int, mandatory
        Objective power for a state given in MW
    scenario_year: str, mandatory
        name of the scenario
    source: str, mandatory
        Type of energy genetor. Always "Wind_onshore" for this script.
    fed_state: str, mandatory
        Name of the state where the wind farms will be allocated

    """

    def match_district_se(x):
        for sub in hvmv_substation.index:
            if x["geom"].contains(hvmv_substation.at[sub, "point"]):
                return hvmv_substation.at[sub, "point"]

    con = db.engine()
    sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
    # hvmv_substation has the information about HV transmission lines in Germany
    hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")

    # Set wind potential depending on geographical location
    power_north = 21.05  # MW/km²
    power_south = 16.81  # MW/km²
    # Set a maximum installed capacity to limit the power of big potential areas
    max_power_hv = 120  # in MW
    max_power_mv = 20  # in MW
    # Max distance between WF (connected to MV) and nearest HV substation that
    # allows its connection to HV.
    max_dist_hv = 20000  # in meters

    summary = pd.DataFrame(
        columns=["state", "target", "from existin WF", "MV districts"]
    )

    north = [
        "Schleswig-Holstein",
        "Mecklenburg-Vorpommern",
        "Niedersachsen",
        "Bremen",
        "Hamburg",
    ]

    if fed_state in north:
        state_wf["inst capacity [MW]"] = power_north * state_wf["area [km²]"]
    else:
        state_wf["inst capacity [MW]"] = power_south * state_wf["area [km²]"]

    # Divide selected areas based on voltage of connection points
    wf_mv = state_wf[
        (state_wf["voltage"] != "Hochspannung")
        & (state_wf["voltage"] != "Hoechstspannung")
        & (state_wf["voltage"] != "UmspannungZurHochspannung")
    ]

    wf_hv = state_wf[
        (state_wf["voltage"] == "Hochspannung")
        | (state_wf["voltage"] == "Hoechstspannung")
        | (state_wf["voltage"] == "UmspannungZurHochspannung")
    ]

    # Wind farms connected to MV network will be connected to HV network if the distance
    # to the closest HV substation is =< max_dist_hv, and the installed capacity
    # is bigger than max_power_mv
    hvmv_substation = hvmv_substation.to_crs(3035)
    hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
        lambda x: int(x.split(";")[0])
    )
    hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
    hv_substations = hv_substations.unary_union  # join all the hv_substations
    wf_mv["dist_to_HV"] = (
        state_wf["geom"].to_crs(3035).distance(hv_substations)
    )
    wf_mv_to_hv = wf_mv[
        (wf_mv["dist_to_HV"] <= max_dist_hv)
        & (wf_mv["inst capacity [MW]"] >= max_power_mv)
    ]
    wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
    wf_mv_to_hv["voltage"] = "Hochspannung"

    wf_hv = wf_hv.append(wf_mv_to_hv)
    wf_mv = wf_mv[
        (wf_mv["dist_to_HV"] > max_dist_hv)
        | (wf_mv["inst capacity [MW]"] < max_power_mv)
    ]
    wf_mv = wf_mv.drop(columns=["dist_to_HV"])

    wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_hv else max_power_hv
    )

    wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
        lambda x: x if x < max_power_mv else max_power_mv
    )

    wind_farms = wf_hv.append(wf_mv)

    # Adjust the total installed capacity to the scenario
    total_wind_power = (
        wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
    )
    if total_wind_power > target_power:
        scale_factor = target_power / total_wind_power
        wf_mv["inst capacity [MW]"] = (
            wf_mv["inst capacity [MW]"] * scale_factor
        )
        wf_hv["inst capacity [MW]"] = (
            wf_hv["inst capacity [MW]"] * scale_factor
        )
        wind_farms = wf_hv.append(wf_mv)
        summary = summary.append(
            {
                "state": fed_state,
                "target": target_power,
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
                "MV districts": 0,
            },
            ignore_index=True,
        )
    else:
        extra_wf = state_mv_districts.copy()
        extra_wf = extra_wf.drop(columns=["centroid"])
        # the column centroid has the coordinates of the substation corresponting
        # to each mv_grid_district
        extra_wf["centroid"] = extra_wf.apply(match_district_se, axis=1)
        extra_wf = extra_wf.set_geometry("centroid")
        extra_wf["area [km²]"] = 0.0
        for district in extra_wf.index:
            try:
                pot_area_district = gpd.clip(
                    state_wf_ni, extra_wf.at[district, "geom"]
                )
                extra_wf.at[district, "area [km²]"] = pot_area_district[
                    "area [km²]"
                ].sum()
            except:
                print(district)
        extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
        total_new_area = extra_wf["area [km²]"].sum()
        scale_factor = (target_power - total_wind_power) / total_new_area
        extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
        extra_wf["voltage"] = "Hochspannung"
        summary = summary.append(
            {
                "state": fed_state,
                "target": target_power,
                "from existin WF": wind_farms["inst capacity [MW]"].sum(),
                "MV districts": extra_wf["inst capacity [MW]"].sum(),
            },
            ignore_index=True,
        )
        wind_farms = wind_farms.append(extra_wf, ignore_index=True)

    # Use Definition of thresholds for voltage level assignment
    wind_farms["voltage_level"] = 0
    for i in wind_farms.index:
        try:
            if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
                wind_farms.at[i, "voltage_level"] = 5
                continue
            if wind_farms.at[i, "inst capacity [MW]"] < 20:
                wind_farms.at[i, "voltage_level"] = 4
                continue
            if wind_farms.at[i, "inst capacity [MW]"] >= 20:
                wind_farms.at[i, "voltage_level"] = 3
                continue
        except:
            print(i)

    # Look for the maximum id in the table egon_power_plants
    sql = "SELECT MAX(id) FROM supply.egon_power_plants"
    max_id = pd.read_sql(sql, con)
    max_id = max_id["max"].iat[0]
    if max_id == None:
        wind_farm_id = 1
    else:
        wind_farm_id = int(max_id + 1)

    # write_table in egon-data database:

    # Copy relevant columns from wind_farms
    insert_wind_farms = wind_farms[
        ["inst capacity [MW]", "voltage_level", "centroid"]
    ]

    # Set static column values
    insert_wind_farms["carrier"] = source
    insert_wind_farms["scenario"] = scenario_year

    # Change name and crs of geometry column
    insert_wind_farms = (
        insert_wind_farms.rename(
            {"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
        )
        .set_geometry("geom")
        .to_crs(4326)
    )

    # Reset index
    insert_wind_farms.index = pd.RangeIndex(
        start=wind_farm_id,
        stop=wind_farm_id + len(insert_wind_farms),
        name="id",
    )

    # Insert into database
    insert_wind_farms.reset_index().to_postgis(
        "egon_power_plants",
        schema="supply",
        con=db.engine(),
        if_exists="append",
    )
    return wind_farms, summary


def generate_map():
    """Generates a map with the position of all the wind farms

    Parameters
    ----------
    *No parameters required

    """
    con = db.engine()

    # Import wind farms from egon-data
    sql = """SELECT  carrier, el_capacity, geom, scenario FROM supply.egon_power_plants
    WHERE carrier = 'wind_onshore'"""
    wind_farms_t = gpd.GeoDataFrame.from_postgis(
        sql, con, geom_col="geom", crs=4326
    )
    wind_farms_t = wind_farms_t.to_crs(3035)

    for scenario in wind_farms_t.scenario.unique():
        wind_farms = wind_farms_t[wind_farms_t['scenario'] == scenario]
        # mv_districts has geographic info of medium voltage districts in Germany
        sql = "SELECT geom FROM grid.egon_mv_grid_district"
        mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
        mv_districts = mv_districts.to_crs(3035)

        mv_districts["power"] = 0.0
        for std in mv_districts.index:
            try:
                mv_districts.at[std, "power"] = gpd.clip(
                    wind_farms, mv_districts.at[std, "geom"]
                ).el_capacity.sum()
            except:
                print(std)

        fig, ax = plt.subplots(1, 1)
        mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
        mv_districts.plot(
            ax=ax,
            column="power",
            cmap="magma_r",
            legend=True,
            legend_kwds={
                "label": "Installed capacity in MW",
                "orientation": "vertical",
            },
        )
        plt.savefig(f'wind_farms_{scenario}.png', dpi=300)
    return 0


1			from matplotlib import pyplot as plt
2			from shapely.geometry import LineString, MultiPoint, Point, Polygon
3			import geopandas as gpd
4			import numpy as np
5			import pandas as pd
6
7			import egon.data.config
8			from egon.data import db
9
10
11			def insert():
12			"""Main function. Import power objectives generate results calling the
13			functions "generate_wind_farms" and "wind_power_states".
14
15			Parameters
16			----------
17			*No parameters required
18
19			"""
20
21			con = db.engine()
22
23			# federal_std has the shapes of the German states
24			sql = "SELECT gen, gf, nuts, geometry FROM boundaries.vg250_lan"
25			federal_std = gpd.GeoDataFrame.from_postgis(
26			sql, con, geom_col="geometry", crs=4326
27			)
28
29			# target_power_df has the expected capacity of each federal state
30			sql = "SELECT carrier, capacity, nuts, scenario_name FROM supply.egon_scenario_capacities"
31			target_power_df = pd.read_sql(sql, con)
32
33			# mv_districts has geographic info of medium voltage districts in Germany
34			sql = "SELECT geom FROM grid.egon_mv_grid_district"
35			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
36
37			# Delete all the water bodies from the federal states shapes
38			federal_std = federal_std[federal_std["gf"] == 4]
39			federal_std.drop(columns=["gf"], inplace=True)
40			# Filter the potential expected from wind_onshore
41			target_power_df = target_power_df[
42			target_power_df["carrier"] == "wind_onshore"
43			]
44			target_power_df.set_index("nuts", inplace=True)
45			target_power_df["geom"] = Point(0, 0)
46
47			# Join the geometries which belong to the same states
48			for std in target_power_df.index:
49			df = federal_std[federal_std["nuts"] == std]
50			if df.size > 0:
51			target_power_df.at[std, "name"] = df["gen"].iat[0]
52			else:
53			target_power_df.at[std, "name"] = np.nan
54			target_power_df.at[std, "geom"] = df.unary_union
55			target_power_df = gpd.GeoDataFrame(
56			target_power_df, geometry="geom", crs=4326
57			)
58			target_power_df = target_power_df[target_power_df["capacity"] > 0]
59			target_power_df = target_power_df.to_crs(3035)
60
61			#Create the shape for full Germany
62			target_power_df.at['DE', 'geom'] = target_power_df['geom'].unary_union
63			target_power_df.at['DE', 'name'] = 'Germany'
64			# Generate WFs for Germany based on potential areas and existing WFs
65			wf_areas, wf_areas_ni = generate_wind_farms()
66
67			# Change the columns "geometry" of this GeoDataFrames
68			wf_areas.set_geometry("centroid", inplace=True)
69			wf_areas_ni.set_geometry("centroid", inplace=True)
70
71			# Create centroids of mv_districts to apply the clip function
72			mv_districts["centroid"] = mv_districts.centroid
73			mv_districts.set_geometry("centroid", inplace=True)
74
75			summary_t = pd.DataFrame()
76			farms = pd.DataFrame()
77
78			# Fit wind farms scenarions for each one of the states
79			for scenario in target_power_df.index:
80			state_wf = gpd.clip(wf_areas, target_power_df.at[scenario, "geom"])
81			state_wf_ni = gpd.clip(
82			wf_areas_ni, target_power_df.at[scenario, "geom"]
83			)
84			state_mv_districts = gpd.clip(
85			mv_districts, target_power_df.at[scenario, "geom"]
86			)
87			target_power = target_power_df.at[scenario, "capacity"]
88			scenario_year = target_power_df.at[scenario, "scenario_name"]
89			source = target_power_df.at[scenario, "carrier"]
90			fed_state = target_power_df.at[scenario, "name"]
91			wind_farms_state, summary_state = wind_power_states(
92			state_wf,
93			state_wf_ni,
94			state_mv_districts,
95			target_power,
96			scenario_year,
97			source,
98			fed_state,
99			)
100			summary_t = summary_t.append(summary_state)
101			farms = farms.append(wind_farms_state)
102
103			generate_map()
104
105			return (summary_t, farms)
106
107
108			def generate_wind_farms():
109			"""Generate wind farms based on existing wind farms.
110
111			Parameters
112			----------
113			*No parameters required
114
115			"""
116			# get config
117			cfg = egon.data.config.datasets()["power_plants"]
118
119			# Due to typos in some inputs, some areas of existing wind farms
120			# should be discarded using perimeter and area filters
121			def filter_current_wf(wf_geometry):
122			if wf_geometry.geom_type == "Point":
123			return True
124			if wf_geometry.geom_type == "Polygon":
125			area = wf_geometry.area
126			length = wf_geometry.length
127			# Filter based on the biggest (# of WT) wind farm
128			return (area < 40000000) & (length < 40000)
129			if wf_geometry.geom_type == "LineString":
130			length = wf_geometry.length
131			return length < 1008 # 8 * rotor diameter (8*126m)
132
133			# The function 'wind_farm' returns the connection point of a wind turbine
134			def wind_farm(x):
135			try:
136			return map_ap_wea_farm[x]
137			except:
138			return np.nan
139
140			# The function 'voltage' returns the voltage level a wind turbine operates
141			def voltage(x):
142			try:
143			return map_ap_wea_voltage[x]
144			except:
145			return np.nan
146
147			# Connect to the data base
148			con = db.engine()
149			sql = "SELECT geom FROM supply.egon_re_potential_area_wind"
150			# wf_areas has all the potential areas geometries for wind farms
151			wf_areas = gpd.GeoDataFrame.from_postgis(sql, con)
152			# bus has the connection points of the wind farms
153			bus = pd.read_csv(cfg["sources"]["mastr_location"], index_col= "MaStRNummer")
154			# Drop all the rows without connection point
155			bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True)
156			# wea has info of each wind turbine in Germany.
157			wea = pd.read_csv(cfg["sources"]["mastr_wind"])
158
159			# Delete all the rows without information about geographical location
160			wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))]
161			# Delete all the offshore wind turbines
162			wea = wea.loc[wea["Lage"] == "Windkraft an Land"]
163			# the variable map_ap_wea_farm have the connection point of all the available wt
164			# in the dataframe bus.
165			map_ap_wea_farm = {}
166			map_ap_wea_voltage = {}
167
168			wea["connection point"] = wea["LokationMastrNummer"].map(
169			bus["NetzanschlusspunktMastrNummer"])
170			wea["voltage"] = wea["LokationMastrNummer"].map(
171			bus["NetzanschlusspunktMastrNummer"])
172
173			# Create the columns 'geometry' which will have location of each WT in a point type
174			wea = gpd.GeoDataFrame(
175			wea,
176			geometry=gpd.points_from_xy(
177			wea["Laengengrad"], wea["Breitengrad"], crs=4326
178			),
179			)
180
181			# wf_size storages the number of WT connected to each connection point
182			wf_size = wea["connection point"].value_counts()
183			# Delete all the connection points with less than 3 WT
184			wf_size = wf_size[wf_size >= 3]
185			# Filter all the WT which are not part of a wind farm of at least 3 WT
186			wea = wea[wea["connection point"].isin(wf_size.index)]
187			# current_wfs has all the geometries that represent the existing wind farms
188			current_wfs = gpd.GeoDataFrame(
189			index=wf_size.index, crs=4326, columns=["geometry", "voltage"]
190			)
191			for conn_point, wt_location in wea.groupby("connection point"):
192			current_wfs.at[conn_point, "geometry"] = MultiPoint(
193			wt_location["geometry"].values
194			).convex_hull
195			current_wfs.at[conn_point, "voltage"] = wt_location["voltage"].iat[0]
196
197			current_wfs["geometry2"] = current_wfs["geometry"].to_crs(3035)
198			current_wfs["area"] = current_wfs["geometry2"].apply(lambda x: x.area)
199			current_wfs["length"] = current_wfs["geometry2"].apply(lambda x: x.length)
200			# The 'filter_wts' is used to discard atypical values for the current wind farms
201			current_wfs["filter2"] = current_wfs["geometry2"].apply(
202			lambda x: filter_current_wf(x)
203			)
204
205			# Apply the filter based on area and perimeter
206			current_wfs = current_wfs[current_wfs["filter2"]]
207			current_wfs = current_wfs.drop(columns=["geometry2", "filter2"])
208
209			wf_areas["area [km²]"] = wf_areas.area / 1000000
210
211			# Exclude areas smaller than X m². X was calculated as the area of
212			# 3 WT in the corners of an equilateral triangle with l = 4*rotor_diameter
213			min_area = 4 * (0.126 ** 2) * np.sqrt(3)
214			wf_areas = wf_areas[wf_areas["area [km²]"] > min_area]
215
216			# Find the centroid of all the suitable potential areas
217			wf_areas["centroid"] = wf_areas.centroid
218
219			# find the potential areas that intersects the convex hulls of current wind farms
220			# and assign voltage levels
221			wf_areas = wf_areas.to_crs(4326)
222			for i in wf_areas.index:
223			intersection = current_wfs.intersects(wf_areas.at[i, "geom"])
224			if intersection.any() == False:
225			wf_areas.at[i, "voltage"] = "No Intersection"
226			else:
227			wf_areas.at[i, "voltage"] = current_wfs[intersection].voltage[0]
228
229			# wf_areas_ni has the potential areas which don't intersect any current wind farm
230			wf_areas_ni = wf_areas[wf_areas["voltage"] == "No Intersection"]
231			wf_areas = wf_areas[wf_areas["voltage"] != "No Intersection"]
232			return wf_areas, wf_areas_ni
233
234
235			def wind_power_states(
236			state_wf,
237			state_wf_ni,
238			state_mv_districts,
239			target_power,
240			scenario_year,
241			source,
242			fed_state,
243			):
244			"""Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database.
245
246			Parameters
247			----------
248			state_wf: geodataframe, mandatory
249			gdf containing all the wf in the state created based on existing wf.
250			state_wf_ni: geodataframe, mandatory
251			potential areas in the the state wich don't intersect any existing wf
252			state_mv_districts: geodataframe, mandatory
253			gdf containing all the MV/HV substations in the state
254			target_power: int, mandatory
255			Objective power for a state given in MW
256			scenario_year: str, mandatory
257			name of the scenario
258			source: str, mandatory
259			Type of energy genetor. Always "Wind_onshore" for this script.
260			fed_state: str, mandatory
261			Name of the state where the wind farms will be allocated
262
263			"""
264
265			def match_district_se(x):
266			for sub in hvmv_substation.index:
267			if x["geom"].contains(hvmv_substation.at[sub, "point"]):
268			return hvmv_substation.at[sub, "point"]
269
270			con = db.engine()
271			sql = "SELECT point, voltage FROM grid.egon_hvmv_substation"
272			# hvmv_substation has the information about HV transmission lines in Germany
273			hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point")
274
275			# Set wind potential depending on geographical location
276			power_north = 21.05 # MW/km²
277			power_south = 16.81 # MW/km²
278			# Set a maximum installed capacity to limit the power of big potential areas
279			max_power_hv = 120 # in MW
280			max_power_mv = 20 # in MW
281			# Max distance between WF (connected to MV) and nearest HV substation that
282			# allows its connection to HV.
283			max_dist_hv = 20000 # in meters
284
285			summary = pd.DataFrame(
286			columns=["state", "target", "from existin WF", "MV districts"]
287			)
288
289			north = [
290			"Schleswig-Holstein",
291			"Mecklenburg-Vorpommern",
292			"Niedersachsen",
293			"Bremen",
294			"Hamburg",
295			]
296
297			if fed_state in north:
298			state_wf["inst capacity [MW]"] = power_north * state_wf["area [km²]"]
299			else:
300			state_wf["inst capacity [MW]"] = power_south * state_wf["area [km²]"]
301
302			# Divide selected areas based on voltage of connection points
303			wf_mv = state_wf[
304			(state_wf["voltage"] != "Hochspannung")
305			& (state_wf["voltage"] != "Hoechstspannung")
306			& (state_wf["voltage"] != "UmspannungZurHochspannung")
307			]
308
309			wf_hv = state_wf[
310			(state_wf["voltage"] == "Hochspannung")
311			\| (state_wf["voltage"] == "Hoechstspannung")
312			\| (state_wf["voltage"] == "UmspannungZurHochspannung")
313			]
314
315			# Wind farms connected to MV network will be connected to HV network if the distance
316			# to the closest HV substation is =< max_dist_hv, and the installed capacity
317			# is bigger than max_power_mv
318			hvmv_substation = hvmv_substation.to_crs(3035)
319			hvmv_substation["voltage"] = hvmv_substation["voltage"].apply(
320			lambda x: int(x.split(";")[0])
321			)
322			hv_substations = hvmv_substation[hvmv_substation["voltage"] >= 110000]
323			hv_substations = hv_substations.unary_union # join all the hv_substations
324			wf_mv["dist_to_HV"] = (
325			state_wf["geom"].to_crs(3035).distance(hv_substations)
326			)
327			wf_mv_to_hv = wf_mv[
328			(wf_mv["dist_to_HV"] <= max_dist_hv)
329			& (wf_mv["inst capacity [MW]"] >= max_power_mv)
330			]
331			wf_mv_to_hv = wf_mv_to_hv.drop(columns=["dist_to_HV"])
332			wf_mv_to_hv["voltage"] = "Hochspannung"
333
334			wf_hv = wf_hv.append(wf_mv_to_hv)
335			wf_mv = wf_mv[
336			(wf_mv["dist_to_HV"] > max_dist_hv)
337			\| (wf_mv["inst capacity [MW]"] < max_power_mv)
338			]
339			wf_mv = wf_mv.drop(columns=["dist_to_HV"])
340
341			wf_hv["inst capacity [MW]"] = wf_hv["inst capacity [MW]"].apply(
342			lambda x: x if x < max_power_hv else max_power_hv
343			)
344
345			wf_mv["inst capacity [MW]"] = wf_mv["inst capacity [MW]"].apply(
346			lambda x: x if x < max_power_mv else max_power_mv
347			)
348
349			wind_farms = wf_hv.append(wf_mv)
350
351			# Adjust the total installed capacity to the scenario
352			total_wind_power = (
353			wf_hv["inst capacity [MW]"].sum() + wf_mv["inst capacity [MW]"].sum()
354			)
355			if total_wind_power > target_power:
356			scale_factor = target_power / total_wind_power
357			wf_mv["inst capacity [MW]"] = (
358			wf_mv["inst capacity [MW]"] * scale_factor
359			)
360			wf_hv["inst capacity [MW]"] = (
361			wf_hv["inst capacity [MW]"] * scale_factor
362			)
363			wind_farms = wf_hv.append(wf_mv)
364			summary = summary.append(
365			{
366			"state": fed_state,
367			"target": target_power,
368			"from existin WF": wind_farms["inst capacity [MW]"].sum(),
369			"MV districts": 0,
370			},
371			ignore_index=True,
372			)
373			else:
374			extra_wf = state_mv_districts.copy()
375			extra_wf = extra_wf.drop(columns=["centroid"])
376			# the column centroid has the coordinates of the substation corresponting
377			# to each mv_grid_district
378			extra_wf["centroid"] = extra_wf.apply(match_district_se, axis=1)
379			extra_wf = extra_wf.set_geometry("centroid")
380			extra_wf["area [km²]"] = 0.0
381			for district in extra_wf.index:
382			try:
383			pot_area_district = gpd.clip(
384			state_wf_ni, extra_wf.at[district, "geom"]
385			)
386			extra_wf.at[district, "area [km²]"] = pot_area_district[
387			"area [km²]"
388			].sum()
389			except:
390			print(district)
391			extra_wf = extra_wf[extra_wf["area [km²]"] != 0]
392			total_new_area = extra_wf["area [km²]"].sum()
393			scale_factor = (target_power - total_wind_power) / total_new_area
394			extra_wf["inst capacity [MW]"] = extra_wf["area [km²]"] * scale_factor
395			extra_wf["voltage"] = "Hochspannung"
396			summary = summary.append(
397			{
398			"state": fed_state,
399			"target": target_power,
400			"from existin WF": wind_farms["inst capacity [MW]"].sum(),
401			"MV districts": extra_wf["inst capacity [MW]"].sum(),
402			},
403			ignore_index=True,
404			)
405			wind_farms = wind_farms.append(extra_wf, ignore_index=True)
406
407			# Use Definition of thresholds for voltage level assignment
408			wind_farms["voltage_level"] = 0
409			for i in wind_farms.index:
410			try:
411			if wind_farms.at[i, "inst capacity [MW]"] < 5.5:
412			wind_farms.at[i, "voltage_level"] = 5
413			continue
414			if wind_farms.at[i, "inst capacity [MW]"] < 20:
415			wind_farms.at[i, "voltage_level"] = 4
416			continue
417			if wind_farms.at[i, "inst capacity [MW]"] >= 20:
418			wind_farms.at[i, "voltage_level"] = 3
419			continue
420			except:
421			print(i)
422
423			# Look for the maximum id in the table egon_power_plants
424			sql = "SELECT MAX(id) FROM supply.egon_power_plants"
425			max_id = pd.read_sql(sql, con)
426			max_id = max_id["max"].iat[0]
427			if max_id == None:
428			wind_farm_id = 1
429			else:
430			wind_farm_id = int(max_id + 1)
431
432			# write_table in egon-data database:
433
434			# Copy relevant columns from wind_farms
435			insert_wind_farms = wind_farms[
436			["inst capacity [MW]", "voltage_level", "centroid"]
437			]
438
439			# Set static column values
440			insert_wind_farms["carrier"] = source
441			insert_wind_farms["scenario"] = scenario_year
442
443			# Change name and crs of geometry column
444			insert_wind_farms = (
445			insert_wind_farms.rename(
446			{"centroid": "geom", "inst capacity [MW]": "el_capacity"}, axis=1
447			)
448			.set_geometry("geom")
449			.to_crs(4326)
450			)
451
452			# Reset index
453			insert_wind_farms.index = pd.RangeIndex(
454			start=wind_farm_id,
455			stop=wind_farm_id + len(insert_wind_farms),
456			name="id",
457			)
458
459			# Insert into database
460			insert_wind_farms.reset_index().to_postgis(
461			"egon_power_plants",
462			schema="supply",
463			con=db.engine(),
464			if_exists="append",
465			)
466			return wind_farms, summary
467
468
469			def generate_map():
470			"""Generates a map with the position of all the wind farms
471
472			Parameters
473			----------
474			*No parameters required
475
476			"""
477			con = db.engine()
478
479			# Import wind farms from egon-data
480			sql = """SELECT carrier, el_capacity, geom, scenario FROM supply.egon_power_plants
481			WHERE carrier = 'wind_onshore'"""
482			wind_farms_t = gpd.GeoDataFrame.from_postgis(
483			sql, con, geom_col="geom", crs=4326
484			)
485			wind_farms_t = wind_farms_t.to_crs(3035)
486
487			for scenario in wind_farms_t.scenario.unique():
488			wind_farms = wind_farms_t[wind_farms_t['scenario'] == scenario]
489			# mv_districts has geographic info of medium voltage districts in Germany
490			sql = "SELECT geom FROM grid.egon_mv_grid_district"
491			mv_districts = gpd.GeoDataFrame.from_postgis(sql, con)
492			mv_districts = mv_districts.to_crs(3035)
493
494			mv_districts["power"] = 0.0
495			for std in mv_districts.index:
496			try:
497			mv_districts.at[std, "power"] = gpd.clip(
498			wind_farms, mv_districts.at[std, "geom"]
499			).el_capacity.sum()
500			except:
501			print(std)
502
503			fig, ax = plt.subplots(1, 1)
504			mv_districts.geom.plot(linewidth=0.2, ax=ax, color="black")
505			mv_districts.plot(
506			ax=ax,
507			column="power",
508			cmap="magma_r",
509			legend=True,
510			legend_kwds={
511			"label": "Installed capacity in MW",
512			"orientation": "vertical",
513			},
514			)
515			plt.savefig(f'wind_farms_{scenario}.png', dpi=300)
516			return 0
517

openego / eGon-data

Pull Request — dev (#1053)

data.datasets.power_plants.wind_farms A

Complexity

Size/Duplication

Importance

4 Functions

Duplication Side-by-Side

Filter issues like