Passed
Pull Request — dev (#1375)
by
unknown
02:18
created

data.datasets.power_plants.PowerPlants.__init__()   B

Complexity

Conditions 1

Size

Total Lines 75
Code Lines 65

Duplication

Lines 75
Ratio 100 %

Importance

Changes 0
Metric Value
eloc 65
dl 75
loc 75
rs 8.1454
c 0
b 0
f 0
cc 1
nop 2

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""The central module containing all code dealing with the distribution and
2
allocation of data on conventional and renewable power plants.
3
"""
4
5
from pathlib import Path
6
import logging
7
8
from geoalchemy2 import Geometry
9
from shapely.geometry import Point
10
from sqlalchemy import BigInteger, Column, Float, Integer, Sequence, String
11
from sqlalchemy.dialects.postgresql import JSONB
12
from sqlalchemy.ext.declarative import declarative_base
13
from sqlalchemy.orm import sessionmaker
14
import geopandas as gpd
15
import numpy as np
16
import pandas as pd
17
18
from egon.data import db, logger
19
from egon.data.datasets import Dataset, wrapped_partial
20
from egon.data.datasets.mastr import (
21
    WORKING_DIR_MASTR_NEW,
22
    WORKING_DIR_MASTR_OLD,
23
)
24
from egon.data.datasets.power_plants.conventional import (
25
    match_nep_no_chp,
26
    select_nep_power_plants,
27
    select_no_chp_combustion_mastr,
28
)
29
from egon.data.datasets.power_plants.mastr import (
30
    EgonPowerPlantsBiomass,
31
    EgonPowerPlantsHydro,
32
    EgonPowerPlantsPv,
33
    EgonPowerPlantsWind,
34
    import_mastr,
35
)
36
from egon.data.datasets.power_plants.pv_rooftop import pv_rooftop_per_mv_grid
37
from egon.data.datasets.power_plants.pv_rooftop_buildings import (
38
    pv_rooftop_to_buildings,
39
)
40
import egon.data.config
41
import egon.data.datasets.power_plants.assign_weather_data as assign_weather_data  # noqa: E501
42
import egon.data.datasets.power_plants.metadata as pp_metadata
43
import egon.data.datasets.power_plants.pv_ground_mounted as pv_ground_mounted
44
import egon.data.datasets.power_plants.wind_farms as wind_onshore
45
import egon.data.datasets.power_plants.wind_offshore as wind_offshore
46
47
from egon_validation import(
48
    RowCountValidation,
49
    DataTypeValidation,
50
    NotNullAndNotNaNValidation,
51
    WholeTableNotNullAndNotNaNValidation,
52
    ValueSetValidation,
53
    SRIDUniqueNonZero
54
)
55
56
Base = declarative_base()
57
58
59 View Code Duplication
class EgonPowerPlants(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
60
    __tablename__ = "egon_power_plants"
61
    __table_args__ = {"schema": "supply"}
62
    id = Column(BigInteger, Sequence("pp_seq"), primary_key=True)
63
    sources = Column(JSONB)
64
    source_id = Column(JSONB)
65
    carrier = Column(String)
66
    el_capacity = Column(Float)
67
    bus_id = Column(Integer)
68
    voltage_level = Column(Integer)
69
    weather_cell_id = Column(Integer)
70
    scenario = Column(String)
71
    geom = Column(Geometry("POINT", 4326), index=True)
72
73
74
def create_tables():
75
    """Create tables for power plant data
76
    Returns
77
    -------
78
    None.
79
    """
80
81
    # Tables for future scenarios
82
    cfg = egon.data.config.datasets()["power_plants"]
83
    db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {cfg['target']['schema']};")
84
    engine = db.engine()
85
    db.execute_sql(
86
        f"""DROP TABLE IF EXISTS
87
        {cfg['target']['schema']}.{cfg['target']['table']}"""
88
    )
89
90
    db.execute_sql("""DROP SEQUENCE IF EXISTS pp_seq""")
91
    EgonPowerPlants.__table__.create(bind=engine, checkfirst=True)
92
93
    # Tables for status quo
94
    tables = [
95
        EgonPowerPlantsWind,
96
        EgonPowerPlantsPv,
97
        EgonPowerPlantsBiomass,
98
        EgonPowerPlantsHydro,
99
    ]
100
    for t in tables:
101
        db.execute_sql(
102
            f"""
103
            DROP TABLE IF EXISTS {t.__table_args__['schema']}.
104
            {t.__tablename__} CASCADE;
105
            """
106
        )
107
        t.__table__.create(bind=engine, checkfirst=True)
108
109
110
def scale_prox2now(df, target, level="federal_state"):
111
    """Scale installed capacities linear to status quo power plants
112
113
    Parameters
114
    ----------
115
    df : pandas.DataFrame
116
        Status Quo power plants
117
    target : pandas.Series
118
        Target values for future scenario
119
    level : str, optional
120
        Scale per 'federal_state' or 'country'. The default is 'federal_state'.
121
122
    Returns
123
    -------
124
    df : pandas.DataFrame
125
        Future power plants
126
127
    """
128
    if level == "federal_state":
129
        df.loc[:, "Nettonennleistung"] = (
130
            (
131
                df.groupby(df.Bundesland)
132
                .Nettonennleistung.apply(lambda grp: grp / grp.sum())
133
                .mul(target[df.Bundesland.values].values)
134
            )
135
            .reset_index(level=[0])
136
            .Nettonennleistung
137
        )
138
    else:
139
        df.loc[:, "Nettonennleistung"] = df.Nettonennleistung * (
140
            target / df.Nettonennleistung.sum()
141
        )
142
143
    df = df[df.Nettonennleistung > 0]
144
145
    return df
146
147
148
def select_target(carrier, scenario):
149
    """Select installed capacity per scenario and carrier
150
151
    Parameters
152
    ----------
153
    carrier : str
154
        Name of energy carrier
155
    scenario : str
156
        Name of scenario
157
158
    Returns
159
    -------
160
    pandas.Series
161
        Target values for carrier and scenario
162
163
    """
164
    cfg = egon.data.config.datasets()["power_plants"]
165
166
    return (
167
        pd.read_sql(
168
            f"""SELECT DISTINCT ON (b.gen)
169
                         REPLACE(REPLACE(b.gen, '-', ''), 'ü', 'ue') as state,
170
                         a.capacity
171
                         FROM {cfg['sources']['capacities']} a,
172
                         {cfg['sources']['geom_federal_states']} b
173
                         WHERE a.nuts = b.nuts
174
                         AND scenario_name = '{scenario}'
175
                         AND carrier = '{carrier}'
176
                         AND b.gen NOT IN ('Baden-Württemberg (Bodensee)',
177
                                           'Bayern (Bodensee)')""",
178
            con=db.engine(),
179
        )
180
        .set_index("state")
181
        .capacity
182
    )
183
184
185
def filter_mastr_geometry(mastr, federal_state=None):
186
    """Filter data from MaStR by geometry
187
188
    Parameters
189
    ----------
190
    mastr : pandas.DataFrame
191
        All power plants listed in MaStR
192
    federal_state : str or None
193
        Name of federal state whoes power plants are returned.
194
        If None, data for Germany is returned
195
196
    Returns
197
    -------
198
    mastr_loc : pandas.DataFrame
199
        Power plants listed in MaStR with geometry inside German boundaries
200
201
    """
202
    cfg = egon.data.config.datasets()["power_plants"]
203
204
    if type(mastr) == pd.core.frame.DataFrame:
205
        # Drop entries without geometry for insert
206
        mastr_loc = mastr[
207
            mastr.Laengengrad.notnull() & mastr.Breitengrad.notnull()
208
        ]
209
210
        # Create geodataframe
211
        mastr_loc = gpd.GeoDataFrame(
212
            mastr_loc,
213
            geometry=gpd.points_from_xy(
214
                mastr_loc.Laengengrad, mastr_loc.Breitengrad, crs=4326
215
            ),
216
        )
217
    else:
218
        mastr_loc = mastr.copy()
219
220
    # Drop entries outside of germany or federal state
221
    if not federal_state:
222
        sql = f"SELECT geometry as geom FROM {cfg['sources']['geom_germany']}"
223
    else:
224
        sql = f"""
225
        SELECT geometry as geom
226
        FROM boundaries.vg250_lan_union
227
        WHERE REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') = '{federal_state}'"""
228
229
    mastr_loc = (
230
        gpd.sjoin(
231
            gpd.read_postgis(sql, con=db.engine()).to_crs(4326),
232
            mastr_loc,
233
            how="right",
234
        )
235
        .query("index_left==0")
236
        .drop("index_left", axis=1)
237
    )
238
239
    return mastr_loc
240
241
242
def insert_biomass_plants(scenario):
243
    """Insert biomass power plants of future scenario
244
245
    Parameters
246
    ----------
247
    scenario : str
248
        Name of scenario.
249
250
    Returns
251
    -------
252
    None.
253
254
    """
255
    cfg = egon.data.config.datasets()["power_plants"]
256
257
    # import target values
258
    target = select_target("biomass", scenario)
259
260
    # import data for MaStR
261
    mastr = pd.read_csv(
262
        WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_biomass"]
263
    ).query("EinheitBetriebsstatus=='InBetrieb'")
264
265
    # Drop entries without federal state or 'AusschließlichWirtschaftszone'
266
    mastr = mastr[
267
        mastr.Bundesland.isin(
268
            pd.read_sql(
269
                f"""SELECT DISTINCT ON (gen)
270
        REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') as states
271
        FROM {cfg['sources']['geom_federal_states']}""",
272
                con=db.engine(),
273
            ).states.values
274
        )
275
    ]
276
277
    # Scaling will be done per federal state in case of eGon2035 scenario.
278
    if scenario == "eGon2035":
279
        level = "federal_state"
280
    else:
281
        level = "country"
282
283
    # Choose only entries with valid geometries inside DE/test mode
284
    mastr_loc = filter_mastr_geometry(mastr).set_geometry("geometry")
285
286
    # Scale capacities to meet target values
287
    mastr_loc = scale_prox2now(mastr_loc, target, level=level)
288
289
    # Assign bus_id
290
    if len(mastr_loc) > 0:
291
        mastr_loc["voltage_level"] = assign_voltage_level(
292
            mastr_loc, cfg, WORKING_DIR_MASTR_OLD
293
        )
294
        mastr_loc = assign_bus_id(mastr_loc, cfg)
295
296
    # Insert entries with location
297
    session = sessionmaker(bind=db.engine())()
298
299
    for i, row in mastr_loc.iterrows():
300
        if not row.ThermischeNutzleistung > 0:
301
            entry = EgonPowerPlants(
302
                sources={"el_capacity": "MaStR scaled with NEP 2021"},
303
                source_id={"MastrNummer": row.EinheitMastrNummer},
304
                carrier="biomass",
305
                el_capacity=row.Nettonennleistung,
306
                scenario=scenario,
307
                bus_id=row.bus_id,
308
                voltage_level=row.voltage_level,
309
                geom=f"SRID=4326;POINT({row.Laengengrad} {row.Breitengrad})",
310
            )
311
            session.add(entry)
312
313
    session.commit()
314
315
316
def insert_hydro_plants(scenario):
317
    """Insert hydro power plants of future scenario.
318
319
    Hydro power plants are diveded into run_of_river and reservoir plants
320
    according to Marktstammdatenregister.
321
    Additional hydro technologies (e.g. turbines inside drinking water
322
    systems) are not considered.
323
324
    Parameters
325
    ----------
326
    scenario : str
327
        Name of scenario.
328
329
    Returns
330
    -------
331
    None.
332
333
    """
334
    cfg = egon.data.config.datasets()["power_plants"]
335
336
    # Map MaStR carriers to eGon carriers
337
    map_carrier = {
338
        "run_of_river": ["Laufwasseranlage"],
339
        "reservoir": ["Speicherwasseranlage"],
340
    }
341
342
    for carrier in map_carrier.keys():
343
        # import target values
344
        if scenario == "eGon100RE":
345
            try:
346
                target = pd.read_sql(
347
                    f"""SELECT capacity FROM supply.egon_scenario_capacities
348
                            WHERE scenario_name = '{scenario}'
349
                            AND carrier = '{carrier}'
350
                            """,
351
                    con=db.engine(),
352
                ).capacity[0]
353
            except:
354
                logger.info(
355
                    f"No assigned capacity for {carrier} in {scenario}"
356
                )
357
                continue
358
359
        elif scenario == "eGon2035":
360
            target = select_target(carrier, scenario)
361
362
        # import data for MaStR
363
        mastr = pd.read_csv(
364
            WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_hydro"]
365
        ).query("EinheitBetriebsstatus=='InBetrieb'")
366
367
        # Choose only plants with specific carriers
368
        mastr = mastr[mastr.ArtDerWasserkraftanlage.isin(map_carrier[carrier])]
369
370
        # Drop entries without federal state or 'AusschließlichWirtschaftszone'
371
        mastr = mastr[
372
            mastr.Bundesland.isin(
373
                pd.read_sql(
374
                    f"""SELECT DISTINCT ON (gen)
375
            REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') as states
376
            FROM {cfg['sources']['geom_federal_states']}""",
377
                    con=db.engine(),
378
                ).states.values
379
            )
380
        ]
381
382
        # Scaling will be done per federal state in case of eGon2035 scenario.
383
        if scenario == "eGon2035":
384
            level = "federal_state"
385
        else:
386
            level = "country"
387
388
        # Scale capacities to meet target values
389
        mastr = scale_prox2now(mastr, target, level=level)
0 ignored issues
show
introduced by
The variable target does not seem to be defined in case the for loop on line 342 is not entered. Are you sure this can never be the case?
Loading history...
390
391
        # Choose only entries with valid geometries inside DE/test mode
392
        mastr_loc = filter_mastr_geometry(mastr).set_geometry("geometry")
393
        # TODO: Deal with power plants without geometry
394
395
        # Assign bus_id and voltage level
396
        if len(mastr_loc) > 0:
397
            mastr_loc["voltage_level"] = assign_voltage_level(
398
                mastr_loc, cfg, WORKING_DIR_MASTR_NEW
399
            )
400
            mastr_loc = assign_bus_id(mastr_loc, cfg)
401
402
        # Insert entries with location
403
        session = sessionmaker(bind=db.engine())()
404
        for i, row in mastr_loc.iterrows():
405
            entry = EgonPowerPlants(
406
                sources={"el_capacity": "MaStR scaled with NEP 2021"},
407
                source_id={"MastrNummer": row.EinheitMastrNummer},
408
                carrier=carrier,
409
                el_capacity=row.Nettonennleistung,
410
                scenario=scenario,
411
                bus_id=row.bus_id,
412
                voltage_level=row.voltage_level,
413
                geom=f"SRID=4326;POINT({row.Laengengrad} {row.Breitengrad})",
414
            )
415
            session.add(entry)
416
417
        session.commit()
418
419
420
def assign_voltage_level(mastr_loc, cfg, mastr_working_dir):
421
    """Assigns voltage level to power plants.
422
423
    If location data inluding voltage level is available from
424
    Marktstammdatenregister, this is used. Otherwise the voltage level is
425
    assigned according to the electrical capacity.
426
427
    Parameters
428
    ----------
429
    mastr_loc : pandas.DataFrame
430
        Power plants listed in MaStR with geometry inside German boundaries
431
432
    Returns
433
    -------
434
    pandas.DataFrame
435
        Power plants including voltage_level
436
437
    """
438
    mastr_loc["Spannungsebene"] = np.nan
439
    mastr_loc["voltage_level"] = np.nan
440
441
    if "LokationMastrNummer" in mastr_loc.columns:
442
        # Adjust column names to format of MaStR location dataset
443
        if mastr_working_dir == WORKING_DIR_MASTR_OLD:
444
            cols = ["LokationMastrNummer", "Spannungsebene"]
445
        elif mastr_working_dir == WORKING_DIR_MASTR_NEW:
446
            cols = ["MaStRNummer", "Spannungsebene"]
447
        else:
448
            raise ValueError("Invalid MaStR working directory!")
449
450
        location = (
451
            pd.read_csv(
452
                mastr_working_dir / cfg["sources"]["mastr_location"],
453
                usecols=cols,
454
            )
455
            .rename(columns={"MaStRNummer": "LokationMastrNummer"})
456
            .set_index("LokationMastrNummer")
457
        )
458
459
        location = location[~location.index.duplicated(keep="first")]
460
461
        mastr_loc.loc[
462
            mastr_loc[
463
                mastr_loc.LokationMastrNummer.isin(location.index)
464
            ].index,
465
            "Spannungsebene",
466
        ] = location.Spannungsebene[
467
            mastr_loc[
468
                mastr_loc.LokationMastrNummer.isin(location.index)
469
            ].LokationMastrNummer
470
        ].values
471
472
        # Transfer voltage_level as integer from Spanungsebene
473
        map_voltage_levels = pd.Series(
474
            data={
475
                "Höchstspannung": 1,
476
                "Hoechstspannung": 1,
477
                "UmspannungZurHochspannung": 2,
478
                "Hochspannung": 3,
479
                "UmspannungZurMittelspannung": 4,
480
                "Mittelspannung": 5,
481
                "UmspannungZurNiederspannung": 6,
482
                "Niederspannung": 7,
483
            }
484
        )
485
486
        mastr_loc.loc[
487
            mastr_loc[mastr_loc["Spannungsebene"].notnull()].index,
488
            "voltage_level",
489
        ] = map_voltage_levels[
490
            mastr_loc.loc[
491
                mastr_loc[mastr_loc["Spannungsebene"].notnull()].index,
492
                "Spannungsebene",
493
            ].values
494
        ].values
495
496
    else:
497
        print(
498
            "No information about MaStR location available. "
499
            "All voltage levels are assigned using threshold values."
500
        )
501
502
    # If no voltage level is available from mastr, choose level according
503
    # to threshold values
504
505
    mastr_loc.voltage_level = assign_voltage_level_by_capacity(mastr_loc)
506
507
    return mastr_loc.voltage_level
508
509
510
def assign_voltage_level_by_capacity(mastr_loc):
511
512
    for i, row in mastr_loc[mastr_loc.voltage_level.isnull()].iterrows():
513
514
        if row.Nettonennleistung > 120:
515
            level = 1
516
        elif row.Nettonennleistung > 20:
517
            level = 3
518
        elif row.Nettonennleistung > 5.5:
519
            level = 4
520
        elif row.Nettonennleistung > 0.2:
521
            level = 5
522
        elif row.Nettonennleistung > 0.1:
523
            level = 6
524
        else:
525
            level = 7
526
527
        mastr_loc.loc[i, "voltage_level"] = level
528
529
    mastr_loc.voltage_level = mastr_loc.voltage_level.astype(int)
530
531
    return mastr_loc.voltage_level
532
533
534 View Code Duplication
def assign_bus_id(power_plants, cfg, drop_missing=False):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
535
    """Assigns bus_ids to power plants according to location and voltage level
536
537
    Parameters
538
    ----------
539
    power_plants : pandas.DataFrame
540
        Power plants including voltage level
541
542
    Returns
543
    -------
544
    power_plants : pandas.DataFrame
545
        Power plants including voltage level and bus_id
546
547
    """
548
549
    mv_grid_districts = db.select_geodataframe(
550
        f"""
551
        SELECT * FROM {cfg['sources']['egon_mv_grid_district']}
552
        """,
553
        epsg=4326,
554
    )
555
556
    ehv_grid_districts = db.select_geodataframe(
557
        f"""
558
        SELECT * FROM {cfg['sources']['ehv_voronoi']}
559
        """,
560
        epsg=4326,
561
    )
562
563
    # Assign power plants in hv and below to hvmv bus
564
    power_plants_hv = power_plants[power_plants.voltage_level >= 3].index
565
    if len(power_plants_hv) > 0:
566
        power_plants.loc[power_plants_hv, "bus_id"] = gpd.sjoin(
567
            power_plants[power_plants.index.isin(power_plants_hv)],
568
            mv_grid_districts,
569
        ).bus_id
570
571
    # Assign power plants in ehv to ehv bus
572
    power_plants_ehv = power_plants[power_plants.voltage_level < 3].index
573
574
    if len(power_plants_ehv) > 0:
575
        ehv_join = gpd.sjoin(
576
            power_plants[power_plants.index.isin(power_plants_ehv)],
577
            ehv_grid_districts,
578
        )
579
580
        if "bus_id_right" in ehv_join.columns:
581
            power_plants.loc[power_plants_ehv, "bus_id"] = gpd.sjoin(
582
                power_plants[power_plants.index.isin(power_plants_ehv)],
583
                ehv_grid_districts,
584
            ).bus_id_right
585
586
        else:
587
            power_plants.loc[power_plants_ehv, "bus_id"] = gpd.sjoin(
588
                power_plants[power_plants.index.isin(power_plants_ehv)],
589
                ehv_grid_districts,
590
            ).bus_id
591
592
    if drop_missing:
593
        power_plants = power_plants[~power_plants.bus_id.isnull()]
594
595
    # Assert that all power plants have a bus_id
596
    assert power_plants.bus_id.notnull().all(), f"""Some power plants are
597
    not attached to a bus: {power_plants[power_plants.bus_id.isnull()]}"""
598
599
    return power_plants
600
601
602
def insert_hydro_biomass():
603
    """Insert hydro and biomass power plants in database
604
605
    Returns
606
    -------
607
    None.
608
609
    """
610
    cfg = egon.data.config.datasets()["power_plants"]
611
    db.execute_sql(
612
        f"""
613
        DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']}
614
        WHERE carrier IN ('biomass', 'reservoir', 'run_of_river')
615
        AND scenario IN ('eGon2035', 'eGon100RE')
616
        """
617
    )
618
619
    s = egon.data.config.settings()["egon-data"]["--scenarios"]
620
    scenarios = []
621
    if "eGon2035" in s:
622
        scenarios.append("eGon2035")
623
        insert_biomass_plants("eGon2035")
624
    if "eGon100RE" in s:
625
        scenarios.append("eGon100RE")
626
627
    for scenario in scenarios:
628
        insert_hydro_plants(scenario)
629
630
631
def allocate_conventional_non_chp_power_plants():
632
    """Allocate conventional power plants without CHPs based on the NEP target
633
    values and data from power plant registry (MaStR) by assigning them in a
634
    cascaded manner.
635
636
    Returns
637
    -------
638
    None.
639
640
    """
641
    # This function is only designed to work for the eGon2035 scenario
642
    if (
643
        "eGon2035"
644
        not in egon.data.config.settings()["egon-data"]["--scenarios"]
645
    ):
646
        return
647
648
    carrier = ["oil", "gas"]
649
650
    cfg = egon.data.config.datasets()["power_plants"]
651
652
    # Delete existing plants in the target table
653
    db.execute_sql(
654
        f"""
655
         DELETE FROM {cfg ['target']['schema']}.{cfg ['target']['table']}
656
         WHERE carrier IN ('gas', 'oil')
657
         AND scenario='eGon2035';
658
         """
659
    )
660
661
    for carrier in carrier:
662
663
        nep = select_nep_power_plants(carrier)
664
665
        if nep.empty:
666
            print(f"DataFrame from NEP for carrier {carrier} is empty!")
667
668
        else:
669
670
            mastr = select_no_chp_combustion_mastr(carrier)
671
672
            # Assign voltage level to MaStR
673
            mastr["voltage_level"] = assign_voltage_level(
674
                mastr.rename({"el_capacity": "Nettonennleistung"}, axis=1),
675
                cfg,
676
                WORKING_DIR_MASTR_OLD,
677
            )
678
679
            # Initalize DataFrame for matching power plants
680
            matched = gpd.GeoDataFrame(
681
                columns=[
682
                    "carrier",
683
                    "el_capacity",
684
                    "scenario",
685
                    "geometry",
686
                    "MaStRNummer",
687
                    "source",
688
                    "voltage_level",
689
                ]
690
            )
691
692
            # Match combustion plants of a certain carrier from NEP list
693
            # using PLZ and capacity
694
            matched, mastr, nep = match_nep_no_chp(
695
                nep,
696
                mastr,
697
                matched,
698
                buffer_capacity=0.1,
699
                consider_carrier=False,
700
            )
701
702
            # Match plants from NEP list using city and capacity
703
            matched, mastr, nep = match_nep_no_chp(
704
                nep,
705
                mastr,
706
                matched,
707
                buffer_capacity=0.1,
708
                consider_carrier=False,
709
                consider_location="city",
710
            )
711
712
            # Match plants from NEP list using plz,
713
            # neglecting the capacity
714
            matched, mastr, nep = match_nep_no_chp(
715
                nep,
716
                mastr,
717
                matched,
718
                consider_location="plz",
719
                consider_carrier=False,
720
                consider_capacity=False,
721
            )
722
723
            # Match plants from NEP list using city,
724
            # neglecting the capacity
725
            matched, mastr, nep = match_nep_no_chp(
726
                nep,
727
                mastr,
728
                matched,
729
                consider_location="city",
730
                consider_carrier=False,
731
                consider_capacity=False,
732
            )
733
734
            # Match remaining plants from NEP using the federal state
735
            matched, mastr, nep = match_nep_no_chp(
736
                nep,
737
                mastr,
738
                matched,
739
                buffer_capacity=0.1,
740
                consider_location="federal_state",
741
                consider_carrier=False,
742
            )
743
744
            # Match remaining plants from NEP using the federal state
745
            matched, mastr, nep = match_nep_no_chp(
746
                nep,
747
                mastr,
748
                matched,
749
                buffer_capacity=0.7,
750
                consider_location="federal_state",
751
                consider_carrier=False,
752
            )
753
754
            print(f"{matched.el_capacity.sum()} MW of {carrier} matched")
755
            print(f"{nep.c2035_capacity.sum()} MW of {carrier} not matched")
756
757
            matched.crs = "EPSG:4326"
758
759
            # Assign bus_id
760
            # Load grid district polygons
761
            mv_grid_districts = db.select_geodataframe(
762
                f"""
763
            SELECT * FROM {cfg['sources']['egon_mv_grid_district']}
764
            """,
765
                epsg=4326,
766
            )
767
768
            ehv_grid_districts = db.select_geodataframe(
769
                f"""
770
            SELECT * FROM {cfg['sources']['ehv_voronoi']}
771
            """,
772
                epsg=4326,
773
            )
774
775
            # Perform spatial joins for plants in ehv and hv level seperately
776
            power_plants_hv = gpd.sjoin(
777
                matched[matched.voltage_level >= 3],
778
                mv_grid_districts[["bus_id", "geom"]],
779
                how="left",
780
            ).drop(columns=["index_right"])
781
            power_plants_ehv = gpd.sjoin(
782
                matched[matched.voltage_level < 3],
783
                ehv_grid_districts[["bus_id", "geom"]],
784
                how="left",
785
            ).drop(columns=["index_right"])
786
787
            # Combine both dataframes
788
            power_plants = pd.concat([power_plants_hv, power_plants_ehv])
789
790
            # Insert into target table
791
            session = sessionmaker(bind=db.engine())()
792
            for i, row in power_plants.iterrows():
793
                entry = EgonPowerPlants(
794
                    sources={"el_capacity": row.source},
795
                    source_id={"MastrNummer": row.MaStRNummer},
796
                    carrier=row.carrier,
797
                    el_capacity=row.el_capacity,
798
                    voltage_level=row.voltage_level,
799
                    bus_id=row.bus_id,
800
                    scenario=row.scenario,
801
                    geom=f"SRID=4326;POINT({row.geometry.x} {row.geometry.y})",
802
                )
803
                session.add(entry)
804
            session.commit()
805
806
807
def allocate_other_power_plants():
808
    # This function is only designed to work for the eGon2035 scenario
809
    if (
810
        "eGon2035"
811
        not in egon.data.config.settings()["egon-data"]["--scenarios"]
812
    ):
813
        return
814
815
    # Get configuration
816
    cfg = egon.data.config.datasets()["power_plants"]
817
    boundary = egon.data.config.settings()["egon-data"]["--dataset-boundary"]
818
819
    db.execute_sql(
820
        f"""
821
        DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']}
822
        WHERE carrier ='others'
823
        """
824
    )
825
826
    # Define scenario, carrier 'others' is only present in 'eGon2035'
827
    scenario = "eGon2035"
828
829
    # Select target values for carrier 'others'
830
    target = db.select_dataframe(
831
        f"""
832
        SELECT sum(capacity) as capacity, carrier, scenario_name, nuts
833
            FROM {cfg['sources']['capacities']}
834
            WHERE scenario_name = '{scenario}'
835
            AND carrier = 'others'
836
            GROUP BY carrier, nuts, scenario_name;
837
        """
838
    )
839
840
    # Assign name of federal state
841
842
    map_states = {
843
        "DE1": "BadenWuerttemberg",
844
        "DEA": "NordrheinWestfalen",
845
        "DE7": "Hessen",
846
        "DE4": "Brandenburg",
847
        "DE5": "Bremen",
848
        "DEB": "RheinlandPfalz",
849
        "DEE": "SachsenAnhalt",
850
        "DEF": "SchleswigHolstein",
851
        "DE8": "MecklenburgVorpommern",
852
        "DEG": "Thueringen",
853
        "DE9": "Niedersachsen",
854
        "DED": "Sachsen",
855
        "DE6": "Hamburg",
856
        "DEC": "Saarland",
857
        "DE3": "Berlin",
858
        "DE2": "Bayern",
859
    }
860
861
    target = (
862
        target.replace({"nuts": map_states})
863
        .rename(columns={"nuts": "Bundesland"})
864
        .set_index("Bundesland")
865
    )
866
    target = target.capacity
867
868
    # Select 'non chp' power plants from mastr table
869
    mastr_combustion = select_no_chp_combustion_mastr("others")
870
871
    # Rename columns
872
    mastr_combustion = mastr_combustion.rename(
873
        columns={
874
            "carrier": "Energietraeger",
875
            "plz": "Postleitzahl",
876
            "city": "Ort",
877
            "federal_state": "Bundesland",
878
            "el_capacity": "Nettonennleistung",
879
        }
880
    )
881
882
    # Select power plants representing carrier 'others' from MaStR files
883
    mastr_sludge = pd.read_csv(
884
        WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_gsgk"]
885
    ).query(
886
        """EinheitBetriebsstatus=='InBetrieb'and Energietraeger=='Klärschlamm'"""  # noqa: E501
887
    )
888
    mastr_geothermal = pd.read_csv(
889
        WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_gsgk"]
890
    ).query(
891
        "EinheitBetriebsstatus=='InBetrieb' and Energietraeger=='Geothermie' "
892
        "and Technologie == 'ORCOrganicRankineCycleAnlage'"
893
    )
894
895
    mastr_sg = pd.concat([mastr_sludge, mastr_geothermal])
896
897
    # Insert geometry column
898
    mastr_sg = mastr_sg[~(mastr_sg["Laengengrad"].isnull())]
899
    mastr_sg = gpd.GeoDataFrame(
900
        mastr_sg,
901
        geometry=gpd.points_from_xy(
902
            mastr_sg["Laengengrad"], mastr_sg["Breitengrad"], crs=4326
903
        ),
904
    )
905
906
    # Exclude columns which are not essential
907
    mastr_sg = mastr_sg.filter(
908
        [
909
            "EinheitMastrNummer",
910
            "Nettonennleistung",
911
            "geometry",
912
            "Energietraeger",
913
            "Postleitzahl",
914
            "Ort",
915
            "Bundesland",
916
        ],
917
        axis=1,
918
    )
919
    # Rename carrier
920
    mastr_sg.Energietraeger = "others"
921
922
    # Change data type
923
    mastr_sg["Postleitzahl"] = mastr_sg["Postleitzahl"].astype(int)
924
925
    # Capacity in MW
926
    mastr_sg.loc[:, "Nettonennleistung"] *= 1e-3
927
928
    # Merge different sources to one df
929
    mastr_others = pd.concat([mastr_sg, mastr_combustion]).reset_index()
930
931
    # Delete entries outside Schleswig-Holstein for test mode
932
    if boundary == "Schleswig-Holstein":
933
        mastr_others = mastr_others[
934
            mastr_others["Bundesland"] == "SchleswigHolstein"
935
        ]
936
937
    # Scale capacities prox to now to meet target values
938
    mastr_prox = scale_prox2now(mastr_others, target, level="federal_state")
939
940
    # Assign voltage_level based on scaled capacity
941
    mastr_prox["voltage_level"] = np.nan
942
    mastr_prox["voltage_level"] = assign_voltage_level_by_capacity(mastr_prox)
943
944
    # Rename columns
945
    mastr_prox = mastr_prox.rename(
946
        columns={
947
            "Energietraeger": "carrier",
948
            "Postleitzahl": "plz",
949
            "Ort": "city",
950
            "Bundesland": "federal_state",
951
            "Nettonennleistung": "el_capacity",
952
        }
953
    )
954
955
    # Assign bus_id
956
    mastr_prox = assign_bus_id(mastr_prox, cfg)
957
    mastr_prox = mastr_prox.set_crs(4326, allow_override=True)
958
959
    # Insert into target table
960
    session = sessionmaker(bind=db.engine())()
961
    for i, row in mastr_prox.iterrows():
962
        entry = EgonPowerPlants(
963
            sources=row.el_capacity,
964
            source_id={"MastrNummer": row.EinheitMastrNummer},
965
            carrier=row.carrier,
966
            el_capacity=row.el_capacity,
967
            voltage_level=row.voltage_level,
968
            bus_id=row.bus_id,
969
            scenario=scenario,
970
            geom=f"SRID=4326; {row.geometry}",
971
        )
972
        session.add(entry)
973
    session.commit()
974
975
976
def discard_not_available_generators(gen, max_date):
977
    gen["decommissioning_date"] = pd.to_datetime(gen["decommissioning_date"])
978
    gen["commissioning_date"] = pd.to_datetime(gen["commissioning_date"])
979
    # drop plants that are commissioned after the max date
980
    gen = gen[gen["commissioning_date"] < max_date]
981
982
    # drop decommissioned plants while keeping the ones decommissioned
983
    # after the max date
984
    gen.loc[(gen["decommissioning_date"] > max_date), "status"] = "InBetrieb"
985
986
    gen = gen.loc[
987
        gen["status"].isin(["InBetrieb", "VoruebergehendStillgelegt"])
988
    ]
989
990
    # drop unnecessary columns
991
    gen = gen.drop(columns=["commissioning_date", "decommissioning_date"])
992
993
    return gen
994
995
996
def fill_missing_bus_and_geom(
997
    gens, carrier, geom_municipalities, mv_grid_districts
998
):
999
    # drop generators without data to get geometry.
1000
    drop_id = gens[
1001
        (gens.geom.is_empty) & ~(gens.location.isin(geom_municipalities.index))
1002
    ].index
1003
    new_geom = gens["capacity"][
1004
        (gens.geom.is_empty) & (gens.location.isin(geom_municipalities.index))
1005
    ]
1006
    logger.info(
1007
        f"""{len(drop_id)} {carrier} generator(s) ({int(gens.loc[drop_id, 'capacity']
1008
        .sum())}MW) were drop"""
1009
    )
1010
1011
    logger.info(
1012
        f"""{len(new_geom)} {carrier} generator(s) ({int(new_geom
1013
        .sum())}MW) received a geom based on location
1014
          """
1015
    )
1016
    gens.drop(index=drop_id, inplace=True)
1017
1018
    # assign missing geometries based on location and buses based on geom
1019
1020
    gens["geom"] = gens.apply(
1021
        lambda x: (
1022
            geom_municipalities.at[x["location"], "geom"]
1023
            if x["geom"].is_empty
1024
            else x["geom"]
1025
        ),
1026
        axis=1,
1027
    )
1028
    gens["bus_id"] = gens.sjoin(
1029
        mv_grid_districts[["bus_id", "geom"]], how="left"
1030
    ).bus_id_right.values
1031
1032
    gens = gens.dropna(subset=["bus_id"])
1033
    # convert geom to WKB
1034
    gens["geom"] = gens["geom"].to_wkt()
1035
1036
    return gens
1037
1038
1039
def power_plants_status_quo(scn_name="status2019"):
1040
    def convert_master_info(df):
1041
        # Add further information
1042
        df["sources"] = [{"el_capacity": "MaStR"}] * df.shape[0]
1043
        df["source_id"] = df["gens_id"].apply(lambda x: {"MastrNummer": x})
1044
        return df
1045
1046
    def log_insert_capacity(df, tech):
1047
        logger.info(
1048
            f"""
1049
            {len(df)} {tech} generators with a total installed capacity of
1050
            {int(df["el_capacity"].sum())} MW were inserted into the db
1051
              """
1052
        )
1053
1054
    con = db.engine()
1055
    cfg = egon.data.config.datasets()["power_plants"]
1056
1057
    db.execute_sql(
1058
        f"""
1059
        DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']}
1060
        WHERE carrier IN ('wind_onshore', 'solar', 'biomass',
1061
                          'run_of_river', 'reservoir', 'solar_rooftop',
1062
                          'wind_offshore', 'nuclear', 'coal', 'lignite', 'oil',
1063
                          'gas')
1064
        AND scenario = '{scn_name}'
1065
        """
1066
    )
1067
1068
    # import municipalities to assign missing geom and bus_id
1069
    geom_municipalities = gpd.GeoDataFrame.from_postgis(
1070
        """
1071
        SELECT gen, ST_UNION(geometry) as geom
1072
        FROM boundaries.vg250_gem
1073
        GROUP BY gen
1074
        """,
1075
        con,
1076
        geom_col="geom",
1077
    ).set_index("gen")
1078
    geom_municipalities["geom"] = geom_municipalities["geom"].centroid
1079
1080
    mv_grid_districts = gpd.GeoDataFrame.from_postgis(
1081
        f"""
1082
        SELECT * FROM {cfg['sources']['egon_mv_grid_district']}
1083
        """,
1084
        con,
1085
    )
1086
    mv_grid_districts.geom = mv_grid_districts.geom.to_crs(4326)
1087
1088
    # Conventional non CHP
1089
    #  ###################
1090
    conv = get_conventional_power_plants_non_chp(scn_name)
1091
1092
    conv = fill_missing_bus_and_geom(
1093
        conv, "conventional", geom_municipalities, mv_grid_districts
1094
    )
1095
1096
    conv = conv.rename(columns={"capacity": "el_capacity"})
1097
1098
    # Write into DB
1099
    with db.session_scope() as session:
1100
        session.bulk_insert_mappings(
1101
            EgonPowerPlants,
1102
            conv.to_dict(orient="records"),
1103
        )
1104
1105
    log_insert_capacity(conv, tech="conventional non chp")
1106
1107
    # Hydro Power Plants
1108
    #  ###################
1109
    hydro = gpd.GeoDataFrame.from_postgis(
1110
        f"""SELECT *, city AS location FROM {cfg['sources']['hydro']}
1111
        WHERE plant_type IN ('Laufwasseranlage', 'Speicherwasseranlage')""",
1112
        con,
1113
        geom_col="geom",
1114
    )
1115
1116
    hydro = fill_missing_bus_and_geom(
1117
        hydro, "hydro", geom_municipalities, mv_grid_districts
1118
    )
1119
1120
    hydro = convert_master_info(hydro)
1121
    hydro["carrier"] = hydro["plant_type"].replace(
1122
        to_replace={
1123
            "Laufwasseranlage": "run_of_river",
1124
            "Speicherwasseranlage": "reservoir",
1125
        }
1126
    )
1127
    hydro["scenario"] = scn_name
1128
    hydro = hydro.rename(columns={"capacity": "el_capacity"})
1129
    hydro = hydro.drop(columns="id")
1130
1131
    # Write into DB
1132
    with db.session_scope() as session:
1133
        session.bulk_insert_mappings(
1134
            EgonPowerPlants,
1135
            hydro.to_dict(orient="records"),
1136
        )
1137
1138
    log_insert_capacity(hydro, tech="hydro")
1139
1140
    # Biomass
1141
    #  ###################
1142
    biomass = gpd.GeoDataFrame.from_postgis(
1143
        f"""SELECT *, city AS location FROM {cfg['sources']['biomass']}""",
1144
        con,
1145
        geom_col="geom",
1146
    )
1147
1148
    # drop chp generators
1149
    biomass["th_capacity"] = biomass["th_capacity"].fillna(0)
1150
    biomass = biomass[biomass.th_capacity == 0]
1151
1152
    biomass = fill_missing_bus_and_geom(
1153
        biomass, "biomass", geom_municipalities, mv_grid_districts
1154
    )
1155
1156
    biomass = convert_master_info(biomass)
1157
    biomass["scenario"] = scn_name
1158
    biomass["carrier"] = "biomass"
1159
    biomass = biomass.rename(columns={"capacity": "el_capacity"})
1160
    biomass = biomass.drop(columns="id")
1161
1162
    # Write into DB
1163
    with db.session_scope() as session:
1164
        session.bulk_insert_mappings(
1165
            EgonPowerPlants,
1166
            biomass.to_dict(orient="records"),
1167
        )
1168
1169
    log_insert_capacity(biomass, tech="biomass")
1170
1171
    # Solar
1172
    #  ###################
1173
    solar = gpd.GeoDataFrame.from_postgis(
1174
        f"""SELECT *, city AS location FROM {cfg['sources']['pv']}
1175
        WHERE site_type IN ('Freifläche',
1176
        'Bauliche Anlagen (Hausdach, Gebäude und Fassade)') """,
1177
        con,
1178
        geom_col="geom",
1179
    )
1180
    map_solar = {
1181
        "Freifläche": "solar",
1182
        "Bauliche Anlagen (Hausdach, Gebäude und Fassade)": "solar_rooftop",
1183
    }
1184
    solar["carrier"] = solar["site_type"].replace(to_replace=map_solar)
1185
1186
    solar = fill_missing_bus_and_geom(
1187
        solar, "solar", geom_municipalities, mv_grid_districts
1188
    )
1189
1190
    solar = convert_master_info(solar)
1191
    solar["scenario"] = scn_name
1192
    solar = solar.rename(columns={"capacity": "el_capacity"})
1193
    solar = solar.drop(columns="id")
1194
1195
    # Write into DB
1196
    with db.session_scope() as session:
1197
        session.bulk_insert_mappings(
1198
            EgonPowerPlants,
1199
            solar.to_dict(orient="records"),
1200
        )
1201
1202
    log_insert_capacity(solar, tech="solar")
1203
1204
    # Wind
1205
    #  ###################
1206
    wind_onshore = gpd.GeoDataFrame.from_postgis(
1207
        f"""SELECT *, city AS location FROM {cfg['sources']['wind']}""",
1208
        con,
1209
        geom_col="geom",
1210
    )
1211
1212
    wind_onshore = fill_missing_bus_and_geom(
1213
        wind_onshore, "wind_onshore", geom_municipalities, mv_grid_districts
1214
    )
1215
1216
    wind_onshore = convert_master_info(wind_onshore)
1217
    wind_onshore["scenario"] = scn_name
1218
    wind_onshore = wind_onshore.rename(columns={"capacity": "el_capacity"})
1219
    wind_onshore["carrier"] = "wind_onshore"
1220
    wind_onshore = wind_onshore.drop(columns="id")
1221
1222
    # Write into DB
1223
    with db.session_scope() as session:
1224
        session.bulk_insert_mappings(
1225
            EgonPowerPlants,
1226
            wind_onshore.to_dict(orient="records"),
1227
        )
1228
1229
    log_insert_capacity(wind_onshore, tech="wind_onshore")
1230
1231
1232
def get_conventional_power_plants_non_chp(scn_name):
1233
1234
    cfg = egon.data.config.datasets()["power_plants"]
1235
    # Write conventional power plants in supply.egon_power_plants
1236
    common_columns = [
1237
        "EinheitMastrNummer",
1238
        "Energietraeger",
1239
        "Nettonennleistung",
1240
        "Laengengrad",
1241
        "Breitengrad",
1242
        "Gemeinde",
1243
        "Inbetriebnahmedatum",
1244
        "EinheitBetriebsstatus",
1245
        "DatumEndgueltigeStilllegung",
1246
    ]
1247
    # import nuclear power plants
1248
    nuclear = pd.read_csv(
1249
        WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_nuclear"],
1250
        usecols=common_columns,
1251
    )
1252
    # import combustion power plants
1253
    comb = pd.read_csv(
1254
        WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_combustion"],
1255
        usecols=common_columns + ["ThermischeNutzleistung"],
1256
    )
1257
1258
    conv = pd.concat([comb, nuclear])
1259
1260
    conv = conv[
1261
        conv.Energietraeger.isin(
1262
            [
1263
                "Braunkohle",
1264
                "Mineralölprodukte",
1265
                "Steinkohle",
1266
                "Kernenergie",
1267
                "Erdgas",
1268
            ]
1269
        )
1270
    ]
1271
1272
    # drop plants that are decommissioned
1273
    conv["DatumEndgueltigeStilllegung"] = pd.to_datetime(
1274
        conv["DatumEndgueltigeStilllegung"]
1275
    )
1276
1277
    # keep plants that were decommissioned after the max date
1278
    conv.loc[
1279
        (
1280
            conv.DatumEndgueltigeStilllegung
1281
            > egon.data.config.datasets()["mastr_new"][f"{scn_name}_date_max"]
1282
        ),
1283
        "EinheitBetriebsstatus",
1284
    ] = "InBetrieb"
1285
1286
    conv = conv.loc[conv.EinheitBetriebsstatus == "InBetrieb"]
1287
1288
    conv = conv.drop(
1289
        columns=["EinheitBetriebsstatus", "DatumEndgueltigeStilllegung"]
1290
    )
1291
1292
    # convert from KW to MW
1293
    conv["Nettonennleistung"] = conv["Nettonennleistung"] / 1000
1294
1295
    # drop generators installed after 2019
1296
    conv["Inbetriebnahmedatum"] = pd.to_datetime(conv["Inbetriebnahmedatum"])
1297
    conv = conv[
1298
        conv["Inbetriebnahmedatum"]
1299
        < egon.data.config.datasets()["mastr_new"][f"{scn_name}_date_max"]
1300
    ]
1301
1302
    conv_cap_chp = (
1303
        conv.groupby("Energietraeger")["Nettonennleistung"].sum() / 1e3
1304
    )
1305
    # drop chp generators
1306
    conv["ThermischeNutzleistung"] = conv["ThermischeNutzleistung"].fillna(0)
1307
    conv = conv[conv.ThermischeNutzleistung == 0]
1308
    conv_cap_no_chp = (
1309
        conv.groupby("Energietraeger")["Nettonennleistung"].sum() / 1e3
1310
    )
1311
1312
    logger.info("Dropped CHP generators in GW")
1313
    logger.info(conv_cap_chp - conv_cap_no_chp)
1314
1315
    # rename carriers
1316
    # rename carriers
1317
    conv["Energietraeger"] = conv["Energietraeger"].replace(
1318
        to_replace={
1319
            "Braunkohle": "lignite",
1320
            "Steinkohle": "coal",
1321
            "Erdgas": "gas",
1322
            "Mineralölprodukte": "oil",
1323
            "Kernenergie": "nuclear",
1324
        }
1325
    )
1326
1327
    # rename columns
1328
    conv.rename(
1329
        columns={
1330
            "EinheitMastrNummer": "gens_id",
1331
            "Energietraeger": "carrier",
1332
            "Nettonennleistung": "capacity",
1333
            "Gemeinde": "location",
1334
        },
1335
        inplace=True,
1336
    )
1337
    conv["bus_id"] = np.nan
1338
    conv["geom"] = gpd.points_from_xy(
1339
        conv.Laengengrad, conv.Breitengrad, crs=4326
1340
    )
1341
    conv.loc[(conv.Laengengrad.isna() | conv.Breitengrad.isna()), "geom"] = (
1342
        Point()
1343
    )
1344
    conv = gpd.GeoDataFrame(conv, geometry="geom")
1345
1346
    # assign voltage level by capacity
1347
    conv["voltage_level"] = np.nan
1348
    conv["voltage_level"] = assign_voltage_level_by_capacity(
1349
        conv.rename(columns={"capacity": "Nettonennleistung"})
1350
    )
1351
    # Add further information
1352
    conv["sources"] = [{"el_capacity": "MaStR"}] * conv.shape[0]
1353
    conv["source_id"] = conv["gens_id"].apply(lambda x: {"MastrNummer": x})
1354
    conv["scenario"] = scn_name
1355
1356
    return conv
1357
1358
1359
def import_gas_gen_egon100():
1360
    scn_name = "eGon100RE"
1361
    if scn_name not in egon.data.config.settings()["egon-data"]["--scenarios"]:
1362
        return
1363
    con = db.engine()
1364
    session = sessionmaker(bind=db.engine())()
1365
    cfg = egon.data.config.datasets()["power_plants"]
1366
    scenario_date_max = "2045-12-31 23:59:00"
1367
1368
    db.execute_sql(
1369
        f"""
1370
        DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']}
1371
        WHERE carrier = 'gas'
1372
        AND bus_id IN (SELECT bus_id from grid.egon_etrago_bus
1373
                WHERE scn_name = '{scn_name}'
1374
                AND country = 'DE')
1375
        AND scenario = '{scn_name}'
1376
        """
1377
    )
1378
1379
    # import municipalities to assign missing geom and bus_id
1380
    geom_municipalities = gpd.GeoDataFrame.from_postgis(
1381
        """
1382
        SELECT gen, ST_UNION(geometry) as geom
1383
        FROM boundaries.vg250_gem
1384
        GROUP BY gen
1385
        """,
1386
        con,
1387
        geom_col="geom",
1388
    ).set_index("gen")
1389
    geom_municipalities["geom"] = geom_municipalities["geom"].centroid
1390
1391
    mv_grid_districts = gpd.GeoDataFrame.from_postgis(
1392
        f"""
1393
        SELECT * FROM {cfg['sources']['egon_mv_grid_district']}
1394
        """,
1395
        con,
1396
    )
1397
    mv_grid_districts.geom = mv_grid_districts.geom.to_crs(4326)
1398
1399
    target = db.select_dataframe(
1400
        f"""
1401
        SELECT capacity FROM supply.egon_scenario_capacities
1402
        WHERE scenario_name = '{scn_name}'
1403
        AND carrier = 'gas'
1404
        """,
1405
    ).iat[0, 0]
1406
1407
    conv = pd.read_csv(
1408
        WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_combustion"],
1409
        usecols=[
1410
            "EinheitMastrNummer",
1411
            "Energietraeger",
1412
            "Nettonennleistung",
1413
            "Laengengrad",
1414
            "Breitengrad",
1415
            "Gemeinde",
1416
            "Inbetriebnahmedatum",
1417
            "EinheitBetriebsstatus",
1418
            "DatumEndgueltigeStilllegung",
1419
            "ThermischeNutzleistung",
1420
        ],
1421
    )
1422
1423
    conv = conv[conv.Energietraeger == "Erdgas"]
1424
1425
    conv.rename(
1426
        columns={
1427
            "Inbetriebnahmedatum": "commissioning_date",
1428
            "EinheitBetriebsstatus": "status",
1429
            "DatumEndgueltigeStilllegung": "decommissioning_date",
1430
            "EinheitMastrNummer": "gens_id",
1431
            "Energietraeger": "carrier",
1432
            "Nettonennleistung": "capacity",
1433
            "Gemeinde": "location",
1434
        },
1435
        inplace=True,
1436
    )
1437
1438
    conv = discard_not_available_generators(conv, scenario_date_max)
1439
1440
    # convert from KW to MW
1441
    conv["capacity"] = conv["capacity"] / 1000
1442
1443
    # drop chp generators
1444
    conv["ThermischeNutzleistung"] = conv["ThermischeNutzleistung"].fillna(0)
1445
    conv = conv[conv.ThermischeNutzleistung == 0]
1446
1447
    # rename carriers
1448
    map_carrier_conv = {"Erdgas": "gas"}
1449
    conv["carrier"] = conv["carrier"].map(map_carrier_conv)
1450
1451
    conv["bus_id"] = np.nan
1452
1453
    conv["geom"] = gpd.points_from_xy(
1454
        conv.Laengengrad, conv.Breitengrad, crs=4326
1455
    )
1456
    conv.loc[(conv.Laengengrad.isna() | conv.Breitengrad.isna()), "geom"] = (
1457
        Point()
1458
    )
1459
    conv = gpd.GeoDataFrame(conv, geometry="geom")
1460
1461
    conv = fill_missing_bus_and_geom(
1462
        conv, "conventional", geom_municipalities, mv_grid_districts
1463
    )
1464
    conv["voltage_level"] = np.nan
1465
1466
    conv["voltage_level"] = assign_voltage_level_by_capacity(
1467
        conv.rename(columns={"capacity": "Nettonennleistung"})
1468
    )
1469
1470
    conv["capacity"] = conv["capacity"] * (target / conv["capacity"].sum())
1471
1472
    max_id = db.select_dataframe(
1473
        """
1474
            SELECT max(id) FROM supply.egon_power_plants
1475
            """,
1476
    ).iat[0, 0]
1477
1478
    conv["id"] = range(max_id + 1, max_id + 1 + len(conv))
1479
1480
    for i, row in conv.iterrows():
1481
        entry = EgonPowerPlants(
1482
            id=row.id,
1483
            sources={"el_capacity": "MaStR"},
1484
            source_id={"MastrNummer": row.gens_id},
1485
            carrier=row.carrier,
1486
            el_capacity=row.capacity,
1487
            scenario=scn_name,
1488
            bus_id=row.bus_id,
1489
            voltage_level=row.voltage_level,
1490
            geom=row.geom,
1491
        )
1492
        session.add(entry)
1493
    session.commit()
1494
1495
    logging.info(
1496
        f"""
1497
          {len(conv)} gas generators with a total installed capacity of
1498
          {conv.capacity.sum()}MW were inserted into the db
1499
          """
1500
    )
1501
1502
    return
1503
1504
1505
tasks = (
1506
    create_tables,
1507
    import_mastr,
1508
)
1509
1510
for scn_name in egon.data.config.settings()["egon-data"]["--scenarios"]:
1511
    if "status" in scn_name:
1512
        tasks += (
1513
            wrapped_partial(
1514
                power_plants_status_quo,
1515
                scn_name=scn_name,
1516
                postfix=f"_{scn_name[-4:]}",
1517
            ),
1518
        )
1519
1520
if (
1521
    "eGon2035" in egon.data.config.settings()["egon-data"]["--scenarios"]
1522
    or "eGon100RE" in egon.data.config.settings()["egon-data"]["--scenarios"]
1523
):
1524
    tasks = tasks + (
1525
        insert_hydro_biomass,
1526
        allocate_conventional_non_chp_power_plants,
1527
        allocate_other_power_plants,
1528
        {
1529
            wind_onshore.insert,
1530
            pv_ground_mounted.insert,
1531
            pv_rooftop_per_mv_grid,
1532
        },
1533
    )
1534
1535
if "eGon100RE" in egon.data.config.settings()["egon-data"]["--scenarios"]:
1536
    tasks = tasks + (import_gas_gen_egon100,)
1537
1538
tasks = tasks + (
1539
    pv_rooftop_to_buildings,
1540
    wind_offshore.insert,
1541
)
1542
1543
for scn_name in egon.data.config.settings()["egon-data"]["--scenarios"]:
1544
    tasks += (
1545
        wrapped_partial(
1546
            assign_weather_data.weatherId_and_busId,
1547
            scn_name=scn_name,
1548
            postfix=f"_{scn_name}",
1549
        ),
1550
    )
1551
1552
tasks += (pp_metadata.metadata,)
1553
1554
1555 View Code Duplication
class PowerPlants(Dataset):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1556
    """
1557
    This dataset deals with the distribution and allocation of power plants
1558
1559
    For the distribution and allocation of power plants to their corresponding
1560
    grid connection point different technology-specific methods are applied.
1561
    In a first step separate tables are created for wind, pv, hydro and biomass
1562
    based power plants by running :py:func:`create_tables`.
1563
    Different methods rely on the locations of existing power plants retrieved
1564
    from the official power plant registry 'Marktstammdatenregister' applying
1565
    function :py:func:`ìmport_mastr`.
1566
1567
    *Hydro and Biomass*
1568
    Hydro and biomass power plants are distributed based on the status quo
1569
    locations of existing power plants assuming that no further expansion of
1570
    these technologies is to be expected in Germany. Hydro power plants include
1571
    reservoir and run-of-river plants.
1572
    Power plants without a correct geolocation are not taken into account.
1573
    To compensate this, the installed capacities of the suitable plants are
1574
    scaled up to meet the target value using function :py:func:`scale_prox2now`
1575
1576
    *Conventional power plants without CHP*
1577
    The distribution of conventional plants, excluding CHPs, takes place in
1578
    function :py:func:`allocate_conventional_non_chp_power_plants`. Therefore
1579
    information about future power plants from the grid development plan
1580
    function as the target value and are matched with actual existing power
1581
    plants with correct geolocations from MaStR registry.
1582
1583
    *Wind onshore*
1584
1585
1586
    *Wind offshore*
1587
1588
    *PV ground-mounted*
1589
1590
    *PV rooftop*
1591
1592
    *others*
1593
1594
    *Dependencies*
1595
      * :py:class:`Chp <egon.data.datasets.chp.Chp>`
1596
      * :py:class:`CtsElectricityDemand
1597
      <egon.data.datasets.electricity_demand.CtsElectricityDemand>`
1598
      * :py:class:`HouseholdElectricityDemand
1599
      <egon.data.datasets.electricity_demand.HouseholdElectricityDemand>`
1600
      * :py:class:`mastr_data <egon.data.datasets.mastr.mastr_data>`
1601
      * :py:func:`define_mv_grid_districts
1602
      <egon.data.datasets.mv_grid_districts.define_mv_grid_districts>`
1603
      * :py:class:`RePotentialAreas
1604
      <egon.data.datasets.re_potential_areas.RePotentialAreas>`
1605
      * :py:class:`ZensusVg250
1606
      <egon.data.datasets.RenewableFeedin>`
1607
      * :py:class:`ScenarioCapacities
1608
      <egon.data.datasets.scenario_capacities.ScenarioCapacities>`
1609
      * :py:class:`ScenarioParameters
1610
      <egon.data.datasets.scenario_parameters.ScenarioParameters>`
1611
      * :py:func:`Setup <egon.data.datasets.database.setup>`
1612
      * :py:class:`substation_extraction
1613
      <egon.data.datasets.substation.substation_extraction>`
1614
      * :py:class:`Vg250MvGridDistricts
1615
      <egon.data.datasets.Vg250MvGridDistricts>`
1616
      * :py:class:`ZensusMvGridDistricts
1617
      <egon.data.datasets.zensus_mv_grid_districts.ZensusMvGridDistricts>`
1618
1619
    *Resulting tables*
1620
      * :py:class:`supply.egon_power_plants
1621
      <egon.data.datasets.power_plants.EgonPowerPlants>` is filled
1622
1623
    """
1624
1625
    #:
1626
    name: str = "PowerPlants"
1627
    #:
1628
    version: str = "0.0.29"
1629
1630
    def __init__(self, dependencies):
1631
        super().__init__(
1632
            name=self.name,
1633
            version=self.version,
1634
            dependencies=dependencies,
1635
            tasks=tasks,
1636
            validation={
1637
                "data-quality": [
1638
                    RowCountValidation(
1639
                        table="supply.egon_power_plants",
1640
                        rule_id="TEST_ROW_COUNT.egon_power_plants",
1641
                        expected_count={"Schleswig-Holstein":34828, "Everything": 1103}
1642
                    ),
1643
                    DataTypeValidation(
1644
                        table="supply.egon_power_plants",
1645
                        rule_id="TEST_DATA_MULTIPLE_TYPES.egon_power_plants",
1646
                        column_types={
1647
                            "id": "bigint",
1648
                            "sources": "jsonb",
1649
                            "source_id": "jsonb",
1650
                            "carrier": "character varying",
1651
                            "el_capacity": "double precision",
1652
                            "bus_id": "integer",
1653
                            "voltage_level": "integer",
1654
                            "weather_cell_id": "integer",
1655
                            "scenario": "character varying",
1656
                            "geom": "geometry"
1657
                        }
1658
                    ),
1659
                    NotNullAndNotNaNValidation(
1660
                        table="supply.egon_power_plants",
1661
                        rule_id="TEST_NOT_NAN.egon_power_plants",
1662
                        columns=["id",
1663
                            "sources",
1664
                            "source_id",
1665
                            "carrier",
1666
                            "el_capacity",
1667
                            "bus_id",
1668
                            "voltage_level",
1669
                            "weather_cell_id",
1670
                            "scenario",
1671
                            "geom"]
1672
                    ),
1673
                    WholeTableNotNullAndNotNaNValidation(
1674
                        table="supply.egon_power_plants",
1675
                        rule_id="TEST_WHOLE_TABLE_NOT_NAN.egon_power_plants"
1676
                    ),
1677
                    ValueSetValidation(
1678
                        table="supply.egon_power_plants",
1679
                        rule_id="VALUE_SET_VALIDATION_CARRIER.egon_power_plants",
1680
                        column="carrier",
1681
                        expected_values=["others",
1682
                            "gas",
1683
                            "biomass",
1684
                            "run_of_river",
1685
                            "wind_onshore",
1686
                            "oil",
1687
                            "wind_offshore",
1688
                            "solar",
1689
                            "reservoir"]
1690
                    ),
1691
                    ValueSetValidation(
1692
                        table="supply.egon_power_plants",
1693
                        rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_power_plants",
1694
                        column="scenario",
1695
                        expected_values=["eGon2035", "eGon100RE"]
1696
                    ),
1697
                    SRIDUniqueNonZero(
1698
                        table="supply.egon_power_plants",
1699
                        rule_id="SRIDUniqueNonZero.egon_power_plants.geom",
1700
                        column="geom"
1701
                    ),
1702
                ]
1703
            },
1704
            on_validation_failure="continue"
1705
        )
1706