Passed
Pull Request — dev (#1054)
by
unknown
01:31
created

data.datasets.power_plants.mastr.import_mastr()   B

Complexity

Conditions 4

Size

Total Lines 175
Code Lines 127

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 127
dl 0
loc 175
rs 7
c 0
b 0
f 0
cc 4
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""Import MaStR dataset and write to DB tables
2
3
Data dump from Marktstammdatenregister (2022-11-17) is imported into the
4
database. Only some technologies are taken into account and written to the
5
following tables:
6
7
* PV: table `supply.egon_power_plants_pv`
8
* wind turbines: table `supply.egon_power_plants_wind`
9
* biomass/biogas plants: table `supply.egon_power_plants_biomass`
10
* hydro plants: table `supply.egon_power_plants_hydro`
11
12
Empty data in columns `voltage_level`, `bus_id` (all tables), and `plant_type`
13
(supply.egon_power_plants_hydro) is indicated by the value -1.
14
15
The data is used especially for the generation of status quo grids by ding0.
16
"""
17
from geoalchemy2 import Geometry
18
from sqlalchemy import (
19
    Boolean,
20
    Column,
21
    DateTime,
22
    Float,
23
    Integer,
24
    Sequence,
25
    String,
26
)
27
from sqlalchemy.ext.declarative import declarative_base
28
import geopandas as gpd
29
import pandas as pd
30
31
from egon.data import db
32
import egon.data.config
33
34
Base = declarative_base()
35
36
TESTMODE_OFF = (
37
    egon.data.config.settings()["egon-data"]["--dataset-boundary"]
38
    == "Everything"
39
)
40
41
42
class EgonPowerPlantsPv(Base):
43
    __tablename__ = "egon_power_plants_pv"
44
    __table_args__ = {"schema": "supply"}
45
46
    id = Column(Integer, Sequence("pp_pv_seq"), primary_key=True)
47
    bus_id = Column(Integer, nullable=True)  # Grid district id
48
    gens_id = Column(String, nullable=True)  # EinheitMastrNummer
49
50
    status = Column(String, nullable=True)  # EinheitBetriebsstatus
51
    commissioning_date = Column(DateTime, nullable=True)  # Inbetriebnahmedatum
52
    postcode = Column(String(5), nullable=True)  # Postleitzahl
53
    city = Column(String(50), nullable=True)  # Ort
54
    federal_state = Column(String(31), nullable=True)  # Bundesland
55
56
    site_type = Column(String(69), nullable=True)  # Lage
57
    usage_sector = Column(String(36), nullable=True)  # Nutzungsbereich
58
    orientation_primary = Column(String(11), nullable=True)  # Hauptausrichtung
59
    orientation_primary_angle = Column(
60
        String(18), nullable=True
61
    )  # HauptausrichtungNeigungswinkel
62
    orientation_secondary = Column(
63
        String(11), nullable=True
64
    )  # Nebenausrichtung
65
    orientation_secondary_angle = Column(
66
        String(18), nullable=True
67
    )  # NebenausrichtungNeigungswinkel
68
    orientation_uniform = Column(
69
        Boolean, nullable=True
70
    )  # EinheitlicheAusrichtungUndNeigungswinkel
71
    module_count = Column(Float, nullable=True)  # AnzahlModule
72
73
    capacity = Column(Float, nullable=True)  # Nettonennleistung
74
    capacity_inverter = Column(
75
        Float, nullable=True
76
    )  # ZugeordneteWirkleistungWechselrichter in MW
77
    feedin_type = Column(String(47), nullable=True)  # Einspeisungsart
78
    voltage_level = Column(Integer, nullable=True)
79
80
    geom = Column(Geometry("POINT", 4326), index=True, nullable=True)
81
82
83
class EgonPowerPlantsWind(Base):
84
    __tablename__ = "egon_power_plants_wind"
85
    __table_args__ = {"schema": "supply"}
86
87
    id = Column(Integer, Sequence("pp_wind_seq"), primary_key=True)
88
    bus_id = Column(Integer, nullable=True)  # Grid district id
89
    gens_id = Column(String, nullable=True)  # EinheitMastrNummer
90
91
    status = Column(String, nullable=True)  # EinheitBetriebsstatus
92
    commissioning_date = Column(DateTime, nullable=True)  # Inbetriebnahmedatum
93
    postcode = Column(String(5), nullable=True)  # Postleitzahl
94
    city = Column(String(50), nullable=True)  # Ort
95
    federal_state = Column(String(31), nullable=True)  # Bundesland
96
97
    site_type = Column(String(17), nullable=True)  # Lage
98
    manufacturer_name = Column(String(100), nullable=True)  # Hersteller
99
    type_name = Column(String(100), nullable=True)  # Typenbezeichnung
100
    hub_height = Column(Float, nullable=True)  # Nabenhoehe
101
    rotor_diameter = Column(Float, nullable=True)  # Rotordurchmesser
102
103
    capacity = Column(Float, nullable=True)  # Nettonennleistung
104
    feedin_type = Column(String(47), nullable=True)  # Einspeisungsart
105
    voltage_level = Column(Integer, nullable=True)
106
107
    geom = Column(Geometry("POINT", 4326), index=True, nullable=True)
108
109
110
class EgonPowerPlantsBiomass(Base):
111
    __tablename__ = "egon_power_plants_biomass"
112
    __table_args__ = {"schema": "supply"}
113
114
    id = Column(Integer, Sequence("pp_biomass_seq"), primary_key=True)
115
    bus_id = Column(Integer, nullable=True)  # Grid district id
116
    gens_id = Column(String, nullable=True)  # EinheitMastrNummer
117
118
    status = Column(String, nullable=True)  # EinheitBetriebsstatus
119
    commissioning_date = Column(DateTime, nullable=True)  # Inbetriebnahmedatum
120
    postcode = Column(String(5), nullable=True)  # Postleitzahl
121
    city = Column(String(50), nullable=True)  # Ort
122
    federal_state = Column(String(31), nullable=True)  # Bundesland
123
124
    technology = Column(String(45), nullable=True)  # Technologie
125
    fuel_name = Column(String(52), nullable=True)  # Hauptbrennstoff
126
    fuel_type = Column(String(19), nullable=True)  # Biomasseart
127
128
    capacity = Column(Float, nullable=True)  # Nettonennleistung
129
    th_capacity = Column(Float, nullable=True)  # ThermischeNutzleistung
130
    feedin_type = Column(String(47), nullable=True)  # Einspeisungsart
131
    voltage_level = Column(Integer, nullable=True)
132
133
    geom = Column(Geometry("POINT", 4326), index=True, nullable=True)
134
135
136
class EgonPowerPlantsHydro(Base):
137
    __tablename__ = "egon_power_plants_hydro"
138
    __table_args__ = {"schema": "supply"}
139
140
    id = Column(Integer, Sequence("pp_hydro_seq"), primary_key=True)
141
    bus_id = Column(Integer, nullable=True)  # Grid district id
142
    gens_id = Column(String, nullable=True)  # EinheitMastrNummer
143
144
    status = Column(String, nullable=True)  # EinheitBetriebsstatus
145
    commissioning_date = Column(DateTime, nullable=True)  # Inbetriebnahmedatum
146
    postcode = Column(String(5), nullable=True)  # Postleitzahl
147
    city = Column(String(50), nullable=True)  # Ort
148
    federal_state = Column(String(31), nullable=True)  # Bundesland
149
150
    plant_type = Column(String(39), nullable=True)  # ArtDerWasserkraftanlage
151
    water_origin = Column(String(20), nullable=True)  # ArtDesZuflusses
152
153
    capacity = Column(Float, nullable=True)  # Nettonennleistung
154
    feedin_type = Column(String(47), nullable=True)  # Einspeisungsart
155
    voltage_level = Column(Integer, nullable=True)
156
157
    geom = Column(Geometry("POINT", 4326), index=True, nullable=True)
158
159
160
def import_mastr() -> None:
161
    """Import MaStR data into database"""
162
    engine = db.engine()
163
    cfg = egon.data.config.datasets()["power_plants"]
164
165
    cols_mapping = {
166
        "all": {
167
            "EinheitMastrNummer": "gens_id",
168
            "EinheitBetriebsstatus": "status",
169
            "Inbetriebnahmedatum": "commissioning_date",
170
            "Postleitzahl": "postcode",
171
            "Ort": "city",
172
            "Bundesland": "federal_state",
173
            "Nettonennleistung": "capacity",
174
            "Einspeisungsart": "feedin_type",
175
        },
176
        "pv": {
177
            "Lage": "site_type",
178
            "Nutzungsbereich": "usage_sector",
179
            "Hauptausrichtung": "orientation_primary",
180
            "HauptausrichtungNeigungswinkel": "orientation_primary_angle",
181
            "Nebenausrichtung": "orientation_secondary",
182
            "NebenausrichtungNeigungswinkel": "orientation_secondary_angle",
183
            "EinheitlicheAusrichtungUndNeigungswinkel": "orientation_uniform",
184
            "AnzahlModule": "module_count",
185
            "zugeordneteWirkleistungWechselrichter": "capacity_inverter",
186
        },
187
        "wind": {
188
            "Lage": "site_type",
189
            "Hersteller": "manufacturer_name",
190
            "Typenbezeichnung": "type_name",
191
            "Nabenhoehe": "hub_height",
192
            "Rotordurchmesser": "rotor_diameter",
193
        },
194
        "biomass": {
195
            "Technologie": "technology",
196
            "Hauptbrennstoff": "fuel_name",
197
            "Biomasseart": "fuel_type",
198
            "ThermischeNutzleistung": "th_capacity",
199
        },
200
        "hydro": {
201
            "ArtDerWasserkraftanlage": "plant_type",
202
            "ArtDesZuflusses": "water_origin",
203
        },
204
    }
205
206
    source_files = {
207
        "pv": cfg["sources"]["mastr_pv"],
208
        "wind": cfg["sources"]["mastr_wind"],
209
        "biomass": cfg["sources"]["mastr_biomass"],
210
        "hydro": cfg["sources"]["mastr_hydro"],
211
    }
212
    target_tables = {
213
        "pv": EgonPowerPlantsPv,
214
        "wind": EgonPowerPlantsWind,
215
        "biomass": EgonPowerPlantsBiomass,
216
        "hydro": EgonPowerPlantsHydro,
217
    }
218
    vlevel_mapping = {
219
        "Höchstspannung": 1,
220
        "UmspannungZurHochspannung": 2,
221
        "Hochspannung": 3,
222
        "UmspannungZurMittelspannung": 4,
223
        "Mittelspannung": 5,
224
        "UmspannungZurNiederspannung": 6,
225
        "Niederspannung": 7,
226
    }
227
228
    # import locations
229
    locations = pd.read_csv(cfg["sources"]["mastr_location"], index_col=None)
230
231
    # import grid districts
232
    mv_grid_districts = db.select_geodataframe(
233
        f"""
234
        SELECT * FROM {cfg['sources']['egon_mv_grid_district']}
235
        """,
236
        epsg=4326,
237
    )
238
239
    # import units
240
    technologies = ["pv", "wind", "biomass", "hydro"]
241
    for tech in technologies:
242
        # read units
243
        print(f"Importing MaStR dataset: {tech}:")
244
        print("  Reading CSV and filtering data...")
245
        units = pd.read_csv(
246
            source_files[tech],
247
            usecols=(
248
                ["LokationMastrNummer", "Laengengrad", "Breitengrad", "Land"]
249
                + list(cols_mapping["all"].keys())
250
                + list(cols_mapping[tech].keys())
251
            ),
252
            index_col=None,
253
            dtype={"Postleitzahl": str},
254
        ).rename(columns=cols_mapping)
255
256
        # drop units outside of Germany
257
        len_old = len(units)
258
        units = units.loc[units.Land == "Deutschland"]
259
        print(f"    {len_old-len(units)} units outside of Germany dropped...")
260
261
        # filter for SH units if in testmode
262
        if not TESTMODE_OFF:
263
            print(
264
                """    TESTMODE:
265
                Dropping all units outside of Schleswig-Holstein...
266
                """
267
            )
268
            units = units.loc[units.Bundesland == "SchleswigHolstein"]
269
270
        # merge and rename voltage level
271
        print("  Merging with locations and allocate voltage level...")
272
        units = units.merge(
273
            locations[["MaStRNummer", "Spannungsebene"]],
274
            left_on="LokationMastrNummer",
275
            right_on="MaStRNummer",
276
            how="left",
277
        )
278
        units["voltage_level"] = units.Spannungsebene.replace(vlevel_mapping)
279
280
        # add geometry
281
        print("  Adding geometries...")
282
        units = gpd.GeoDataFrame(
283
            units,
284
            geometry=gpd.points_from_xy(
285
                units["Laengengrad"], units["Breitengrad"], crs=4326
286
            ),
287
            crs=4326,
288
        )
289
        units_wo_geom = len(
290
            units.loc[(units.Laengengrad.isna() | units.Laengengrad.isna())]
291
        )
292
        print(
293
            f"    {units_wo_geom}/{len(units)} units do not have a geometry!"
294
        )
295
296
        # drop unnecessary and rename columns
297
        print("  Reformatting...")
298
        units.drop(
299
            columns=[
300
                "LokationMastrNummer",
301
                "MaStRNummer",
302
                "Laengengrad",
303
                "Breitengrad",
304
                "Spannungsebene",
305
                "Land",
306
            ],
307
            inplace=True,
308
        )
309
        mapping = cols_mapping["all"].copy()
310
        mapping.update(cols_mapping[tech])
311
        mapping.update({"geometry": "geom"})
312
        units.rename(columns=mapping, inplace=True)
313
        units["voltage_level"] = units.voltage_level.fillna(-1).astype(int)
314
        if tech == "hydro":
315
            units["plant_type"] = units.plant_type.fillna(-1).astype(int)
316
        units.set_geometry("geom", inplace=True)
317
        units["id"] = range(0, len(units))
318
319
        # assign bus ids
320
        print("  Assigning bus ids...")
321
        units = (
322
            units.loc[~units.geom.x.isna()]
323
            .sjoin(mv_grid_districts[["bus_id", "geom"]], how="left")
324
            .drop(columns=["index_right"])
325
        )
326
        units["bus_id"] = units.bus_id.fillna(-1).astype(int)
327
328
        # write to DB
329
        print("  Writing to DB...")
330
        units.to_postgis(
331
            name=target_tables[tech].__tablename__,
332
            con=engine,
333
            if_exists="append",
334
            schema=target_tables[tech].__table_args__["schema"],
335
        )
336