Passed
Pull Request — dev (#1362)
by
unknown
01:55
created

data.datasets.demandregio.create_tables()   A

Complexity

Conditions 1

Size

Total Lines 20
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 20
rs 9.7
c 0
b 0
f 0
cc 1
nop 0
1
"""The central module containing all code dealing with importing and
2
adjusting data from demandRegio
3
4
"""
5
6
from pathlib import Path
7
import os
8
import zipfile
9
10
from sqlalchemy import ARRAY, Column, Float, ForeignKey, Integer, String
11
from sqlalchemy.ext.declarative import declarative_base
12
import numpy as np
13
import pandas as pd
14
15
from egon.data import db, logger
16
from egon.data.datasets import Dataset, wrapped_partial
17
from egon.data.datasets.scenario_parameters import (
18
    EgonScenario,
19
    get_sector_parameters,
20
)
21
from egon.data.datasets.zensus import download_and_check
22
import egon.data.config
23
import egon.data.datasets.scenario_parameters.parameters as scenario_parameters
24
25
try:
26
    from disaggregator import config, data, spatial, temporal
27
28
except ImportError as e:
29
    pass
30
31
# will be later imported from another file ###
32
Base = declarative_base()
33
34
35
class DemandRegio(Dataset):
36
    """
37
    Extract and adjust data from DemandRegio
38
39
    Demand data for the sectors households, CTS and industry are calculated
40
    using DemandRegio's diaggregator and input data. To bring the resulting
41
    data in line with other data used in eGon-data and the eGon project in
42
    general some data needed to be adjusted or extended, e.g. in function
43
    :py:func:`adjust_ind_pes` or function :py:func:`adjust_cts_ind_nep`. The
44
    resulting data is written into newly created tables.
45
46
    *Dependencies*
47
      * :py:class:`DataBundle <egon.data.datasets.data_bundle.DataBundle>`
48
      * :py:class:`ScenarioParameters <egon.data.datasets.scenario_parameters.ScenarioParameters>`
49
      * :py:class:`ZensusVg250 <egon.data.datasets.zensus_vg250.ZensusVg250>`
50
51
    *Resulting tables*
52
      * :py:class:`demand.egon_demandregio_hh <egon.data.datasets.demandregio.EgonDemandRegioHH>` is created and filled
53
      * :py:class:`demand.egon_demandregio_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioCtsInd>` is created and filled
54
      * :py:class:`society.egon_demandregio_population <egon.data.datasets.demandregio.EgonDemandRegioPopulation>` is created and filled
55
      * :py:class:`society.egon_demandregio_household <egon.data.datasets.demandregio.EgonDemandRegioHouseholds>` is created and filled
56
      * :py:class:`demand.egon_demandregio_wz <egon.data.datasets.demandregio.EgonDemandRegioWz>` is created and filled
57
      * :py:class:`demand.egon_demandregio_timeseries_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioTimeseriesCtsInd>` is created and filled
58
59
    """
60
61
    #:
62
    name: str = "DemandRegio"
63
    #:
64
    version: str = "0.0.13"
65
66
    def __init__(self, dependencies):
67
        super().__init__(
68
            name=self.name,
69
            version=self.version,
70
            dependencies=dependencies,
71
            tasks=(
72
                get_cached_tables,  # adhoc workaround #180
73
                create_tables,
74
                {
75
                    insert_household_demand,
76
                    insert_society_data,
77
                    insert_cts_ind_demands,
78
                },
79
            ),
80
        )
81
82
83
class DemandRegioLoadProfiles(Base):
84
    __tablename__ = "demandregio_household_load_profiles"
85
    __table_args__ = {"schema": "demand"}
86
87
    id = Column(Integer, primary_key=True)
88
    year = Column(Integer)
89
    nuts3 = Column(String)
90
    load_in_mwh = Column(ARRAY(Float()))
91
92
93
class EgonDemandRegioHH(Base):
94
    __tablename__ = "egon_demandregio_hh"
95
    __table_args__ = {"schema": "demand"}
96
    nuts3 = Column(String(5), primary_key=True)
97
    hh_size = Column(Integer, primary_key=True)
98
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
99
    year = Column(Integer)
100
    demand = Column(Float)
101
102
103
class EgonDemandRegioCtsInd(Base):
104
    __tablename__ = "egon_demandregio_cts_ind"
105
    __table_args__ = {"schema": "demand"}
106
    nuts3 = Column(String(5), primary_key=True)
107
    wz = Column(Integer, primary_key=True)
108
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
109
    year = Column(Integer)
110
    demand = Column(Float)
111
112
113
class EgonDemandRegioPopulation(Base):
114
    __tablename__ = "egon_demandregio_population"
115
    __table_args__ = {"schema": "society"}
116
    nuts3 = Column(String(5), primary_key=True)
117
    year = Column(Integer, primary_key=True)
118
    population = Column(Float)
119
120
121
class EgonDemandRegioHouseholds(Base):
122
    __tablename__ = "egon_demandregio_household"
123
    __table_args__ = {"schema": "society"}
124
    nuts3 = Column(String(5), primary_key=True)
125
    hh_size = Column(Integer, primary_key=True)
126
    year = Column(Integer, primary_key=True)
127
    households = Column(Integer)
128
129
130
class EgonDemandRegioWz(Base):
131
    __tablename__ = "egon_demandregio_wz"
132
    __table_args__ = {"schema": "demand"}
133
    wz = Column(Integer, primary_key=True)
134
    sector = Column(String(50))
135
    definition = Column(String(150))
136
137
138
class EgonDemandRegioTimeseriesCtsInd(Base):
139
    __tablename__ = "egon_demandregio_timeseries_cts_ind"
140
    __table_args__ = {"schema": "demand"}
141
    wz = Column(Integer, primary_key=True)
142
    year = Column(Integer, primary_key=True)
143
    slp = Column(String(50))
144
    load_curve = Column(ARRAY(Float()))
145
146
147
def create_tables():
148
    """Create tables for demandregio data
149
    Returns
150
    -------
151
    None.
152
    """
153
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;")
154
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS society;")
155
    engine = db.engine()
156
    EgonDemandRegioHH.__table__.create(bind=engine, checkfirst=True)
157
    EgonDemandRegioCtsInd.__table__.create(bind=engine, checkfirst=True)
158
    EgonDemandRegioPopulation.__table__.create(bind=engine, checkfirst=True)
159
    EgonDemandRegioHouseholds.__table__.create(bind=engine, checkfirst=True)
160
    EgonDemandRegioWz.__table__.create(bind=engine, checkfirst=True)
161
    DemandRegioLoadProfiles.__table__.create(bind=db.engine(), checkfirst=True)
162
    EgonDemandRegioTimeseriesCtsInd.__table__.drop(
163
        bind=engine, checkfirst=True
164
    )
165
    EgonDemandRegioTimeseriesCtsInd.__table__.create(
166
        bind=engine, checkfirst=True
167
    )
168
169
170
def data_in_boundaries(df):
171
    """Select rows with nuts3 code within boundaries, used for testmode
172
173
    Parameters
174
    ----------
175
    df : pandas.DataFrame
176
        Data for all nuts3 regions
177
178
    Returns
179
    -------
180
    pandas.DataFrame
181
        Data for nuts3 regions within boundaries
182
183
    """
184
    engine = db.engine()
185
186
    df = df.reset_index()
187
188
    # Change nuts3 region names to 2016 version
189
    nuts_names = {"DEB16": "DEB1C", "DEB19": "DEB1D"}
190
    df.loc[df.nuts3.isin(nuts_names), "nuts3"] = df.loc[
191
        df.nuts3.isin(nuts_names), "nuts3"
192
    ].map(nuts_names)
193
194
    df = df.set_index("nuts3")
195
196
    return df[
197
        df.index.isin(
198
            pd.read_sql(
199
                "SELECT DISTINCT ON (nuts) nuts FROM boundaries.vg250_krs",
200
                engine,
201
            ).nuts
202
        )
203
    ]
204
205
206
def insert_cts_ind_wz_definitions():
207
    """Insert demandregio's definitions of CTS and industrial branches
208
209
    Returns
210
    -------
211
    None.
212
213
    """
214
215
    source = egon.data.config.datasets()["demandregio_cts_ind_demand"][
216
        "sources"
217
    ]
218
219
    target = egon.data.config.datasets()["demandregio_cts_ind_demand"][
220
        "targets"
221
    ]["wz_definitions"]
222
223
    engine = db.engine()
224
225
    for sector in source["wz_definitions"]:
226
        file_path = (
227
            Path(".")
228
            / "data_bundle_egon_data"
229
            / "WZ_definition"
230
            / source["wz_definitions"][sector]
231
        )
232
233
        if sector == "CTS":
234
            delimiter = ";"
235
        else:
236
            delimiter = ","
237
        df = (
238
            pd.read_csv(file_path, delimiter=delimiter, header=None)
239
            .rename({0: "wz", 1: "definition"}, axis="columns")
240
            .set_index("wz")
241
        )
242
        df["sector"] = sector
243
        df.to_sql(
244
            target["table"],
245
            engine,
246
            schema=target["schema"],
247
            if_exists="append",
248
        )
249
250
251
def match_nuts3_bl():
252
    """Function that maps the federal state to each nuts3 region
253
254
    Returns
255
    -------
256
    df : pandas.DataFrame
257
        List of nuts3 regions and the federal state of Germany.
258
259
    """
260
261
    engine = db.engine()
262
263
    df = pd.read_sql(
264
        "SELECT DISTINCT ON (boundaries.vg250_krs.nuts) "
265
        "boundaries.vg250_krs.nuts, boundaries.vg250_lan.gen "
266
        "FROM boundaries.vg250_lan, boundaries.vg250_krs "
267
        " WHERE ST_CONTAINS("
268
        "boundaries.vg250_lan.geometry, "
269
        "boundaries.vg250_krs.geometry)",
270
        con=engine,
271
    )
272
273
    df.gen[df.gen == "Baden-Württemberg (Bodensee)"] = "Baden-Württemberg"
274
    df.gen[df.gen == "Bayern (Bodensee)"] = "Bayern"
275
276
    return df.set_index("nuts")
277
278
279
def adjust_ind_pes(ec_cts_ind):
280
    """
281
    Adjust electricity demand of industrial consumers due to electrification
282
    of process heat based on assumptions of pypsa-eur-sec.
283
284
    Parameters
285
    ----------
286
    ec_cts_ind : pandas.DataFrame
287
        Industrial demand without additional electrification
288
289
    Returns
290
    -------
291
    ec_cts_ind : pandas.DataFrame
292
        Industrial demand with additional electrification
293
294
    """
295
296
    pes_path = (
297
        Path(".") / "data_bundle_powerd_data" / "pypsa_eur" / "resources"
298
    )
299
300
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
301
        "sources"
302
    ]["new_consumers_2050"]
303
304
    # Extract today's industrial demand from pypsa-eur-sec
305
    demand_today = pd.read_csv(
306
        pes_path / sources["pes-demand-today"],
307
        header=None,
308
    ).transpose()
309
310
    # Filter data
311
    demand_today[1].fillna("carrier", inplace=True)
312
    demand_today = demand_today[
313
        (demand_today[0] == "DE") | (demand_today[1] == "carrier")
314
    ].drop([0, 2], axis="columns")
315
316
    demand_today = (
317
        demand_today.transpose()
318
        .set_index(0)
319
        .transpose()
320
        .set_index("carrier")
321
        .transpose()
322
        .loc["electricity"]
323
        .astype(float)
324
    )
325
326
    # Calculate future industrial demand from pypsa-eur-sec
327
    # based on production and energy demands per carrier ('sector ratios')
328
    prod_tomorrow = pd.read_csv(pes_path / sources["pes-production-tomorrow"])
329
330
    prod_tomorrow = prod_tomorrow[prod_tomorrow["kton/a"] == "DE"].set_index(
331
        "kton/a"
332
    )
333
334
    sector_ratio = (
335
        pd.read_csv(pes_path / sources["pes-sector-ratios"])
336
        .set_index("MWh/tMaterial")
337
        .loc["elec"]
338
    )
339
340
    demand_tomorrow = prod_tomorrow.multiply(
341
        sector_ratio.div(1000)
342
    ).transpose()["DE"]
343
344
    # Calculate changes of electrical demand per sector in pypsa-eur-sec
345
    change = pd.DataFrame(
346
        (demand_tomorrow / demand_today)
347
        / (demand_tomorrow / demand_today).sum()
348
    )
349
350
    # Drop rows without changes
351
    change = change[~change[0].isnull()]
352
353
    # Map industrial branches of pypsa-eur-sec to WZ2008 used in demandregio
354
    change["wz"] = change.index.map(
355
        {
356
            "Alumina production": 24,
357
            "Aluminium - primary production": 24,
358
            "Aluminium - secondary production": 24,
359
            "Ammonia": 20,
360
            "Basic chemicals (without ammonia)": 20,
361
            "Cement": 23,
362
            "Ceramics & other NMM": 23,
363
            "Electric arc": 24,
364
            "Food, beverages and tobacco": 10,
365
            "Glass production": 23,
366
            "Integrated steelworks": 24,
367
            "Machinery Equipment": 28,
368
            "Other Industrial Sectors": 32,
369
            "Other chemicals": 20,
370
            "Other non-ferrous metals": 24,
371
            "Paper production": 17,
372
            "Pharmaceutical products etc.": 21,
373
            "Printing and media reproduction": 18,
374
            "Pulp production": 17,
375
            "Textiles and leather": 13,
376
            "Transport Equipment": 29,
377
            "Wood and wood products": 16,
378
        }
379
    )
380
381
    # Group by WZ2008
382
    shares_per_wz = change.groupby("wz")[0].sum()
383
384
    # Calculate addtional demands needed to meet future demand of pypsa-eur-sec
385
    addtional_mwh = shares_per_wz.multiply(
386
        demand_tomorrow.sum() * 1000000 - ec_cts_ind.sum().sum()
387
    )
388
389
    # Calulate overall industrial demand for eGon100RE
390
    final_mwh = addtional_mwh + ec_cts_ind[addtional_mwh.index].sum()
391
392
    # Linear scale the industrial demands per nuts3 and wz to meet final demand
393
    ec_cts_ind[addtional_mwh.index] *= (
394
        final_mwh / ec_cts_ind[addtional_mwh.index].sum()
395
    )
396
397
    return ec_cts_ind
398
399
400
def adjust_cts_ind_nep(ec_cts_ind, sector):
401
    """Add electrical demand of new largescale CTS und industrial consumers
402
    according to NEP 2021, scneario C 2035. Values per federal state are
403
    linear distributed over all CTS branches and nuts3 regions.
404
405
    Parameters
406
    ----------
407
    ec_cts_ind : pandas.DataFrame
408
        CTS or industry demand without new largescale consumers.
409
410
    Returns
411
    -------
412
    ec_cts_ind : pandas.DataFrame
413
        CTS or industry demand including new largescale consumers.
414
415
    """
416
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
417
        "sources"
418
    ]
419
420
    file_path = (
421
        Path(".")
422
        / "data_bundle_egon_data"
423
        / "nep2035_version2021"
424
        / sources["new_consumers_2035"]
425
    )
426
427
    # get data from NEP per federal state
428
    new_con = pd.read_csv(file_path, delimiter=";", decimal=",", index_col=0)
429
430
    # match nuts3 regions to federal states
431
    groups = ec_cts_ind.groupby(match_nuts3_bl().gen)
432
433
    # update demands per federal state
434
    for group in groups.indices.keys():
435
        g = groups.get_group(group)
436
        data_new = g.mul(1 + new_con[sector][group] * 1e6 / g.sum().sum())
437
        ec_cts_ind[ec_cts_ind.index.isin(g.index)] = data_new
438
439
    return ec_cts_ind
440
441
442
def disagg_households_power(scenario, year, original=False, **kwargs):
443
    """
444
    Perform spatial disaggregation of electric power in [GWh/a] by key
445
    Similar to disaggregator.spatial.disagg_households_power
446
447
448
    Parameters
449
    ----------
450
    by : str
451
        must be one of ['households', 'population']
452
    orignal : bool, optional
453
        Throughput to function households_per_size,
454
        A flag if the results should be left untouched and returned in
455
        original form for the year 2011 (True) or if they should be scaled to
456
        the given `year` by the population in that year (False).
457
458
    Returns
459
    -------
460
    pd.DataFrame or pd.Series
461
    """
462
    # source: survey of energieAgenturNRW
463
    # with/without direct water heating (DHW), and weighted average
464
    # https://1-stromvergleich.com/wp-content/uploads/erhebung_wo_bleibt_der_strom.pdf
465
    demand_per_hh_size = pd.DataFrame(
466
        index=range(1, 7),
467
        data={
468
            # "weighted DWH": [2290, 3202, 4193, 4955, 5928, 5928],
469
            # "without DHW": [1714, 2812, 3704, 4432, 5317, 5317],
470
            "with_DHW": [2181, 3843, 5151, 6189, 7494, 8465],
471
            "without_DHW": [1798, 2850, 3733, 4480, 5311, 5816],
472
            "weighted": [2256, 3248, 4246, 5009, 5969, 6579],
473
        },
474
    )
475
476
    if scenario == "eGon100RE":
477
        # chose demand per household size from survey without DHW
478
        power_per_HH = demand_per_hh_size["without DHW"] / 1e3
479
480
        # calculate demand per nuts3 in 2011
481
        df_2011 = data.households_per_size(year=2011) * power_per_HH
482
483
        # scale demand per hh-size to meet demand without heat
484
        # according to JRC in 2011 (136.6-(20.14+9.41) TWh)
485
        power_per_HH *= (136.6 - (20.14 + 9.41)) * 1e6 / df_2011.sum().sum()
486
487
        # calculate demand per nuts3 in 2050
488
        df = data.households_per_size(year=year) * power_per_HH
489
490
    # Bottom-Up: Power demand by household sizes in [MWh/a] for each scenario
491
    else:
492
        # chose demand per household size from survey including weighted DHW
493
        power_per_HH = demand_per_hh_size["weighted"] / 1e3
494
495
        # calculate demand per nuts3
496
        df = (
497
            data.households_per_size(original=original, year=year)
498
            * power_per_HH
499
        )
500
501
    return df
502
503
504
def write_demandregio_hh_profiles_to_db(hh_profiles):
505
    """Write HH demand profiles from demand regio into db. One row per
506
    year and nuts3. The annual load profile timeseries is an array.
507
508
    schema: demand
509
    tablename: demandregio_household_load_profiles
510
511
512
513
    Parameters
514
    ----------
515
    hh_profiles: pd.DataFrame
516
517
    Returns
518
    -------
519
    """
520
    years = hh_profiles.index.year.unique().values
521
    df_to_db = pd.DataFrame(
522
        columns=["id", "year", "nuts3", "load_in_mwh"]
523
    ).set_index("id")
524
    dataset = egon.data.config.settings()["egon-data"]["--dataset-boundary"]
525
526
    if dataset == "Schleswig-Holstein":
527
        hh_profiles = hh_profiles.loc[
528
            :, hh_profiles.columns.str.contains("DEF0")
529
        ]
530
531
    idx = pd.read_sql_query(
532
        f"""
533
                           SELECT MAX(id)
534
                           FROM {DemandRegioLoadProfiles.__table__.schema}.
535
                           {DemandRegioLoadProfiles.__table__.name}
536
                           """,
537
        con=db.engine(),
538
    ).iat[0, 0]
539
540
    idx = 0 if idx is None else idx + 1
541
542
    for year in years:
543
        df = hh_profiles[hh_profiles.index.year == year]
544
545
        for nuts3 in hh_profiles.columns:
546
            idx += 1
547
            df_to_db.at[idx, "year"] = year
548
            df_to_db.at[idx, "nuts3"] = nuts3
549
            df_to_db.at[idx, "load_in_mwh"] = df[nuts3].to_list()
550
551
    df_to_db["year"] = df_to_db["year"].apply(int)
552
    df_to_db["nuts3"] = df_to_db["nuts3"].astype(str)
553
    df_to_db["load_in_mwh"] = df_to_db["load_in_mwh"].apply(list)
554
    df_to_db = df_to_db.reset_index()
555
556
    df_to_db.to_sql(
557
        name=DemandRegioLoadProfiles.__table__.name,
558
        schema=DemandRegioLoadProfiles.__table__.schema,
559
        con=db.engine(),
560
        if_exists="append",
561
        index=-False,
562
    )
563
564
565
def insert_hh_demand(scenario, year, engine):
566
    """Calculates electrical demands of private households using demandregio's
567
    disaggregator and insert results into the database.
568
569
    Parameters
570
    ----------
571
    scenario : str
572
        Name of the corresponding scenario.
573
    year : int
574
        The number of households per region is taken from this year.
575
576
    Returns
577
    -------
578
    None.
579
580
    """
581
    targets = egon.data.config.datasets()["demandregio_household_demand"][
582
        "targets"
583
    ]["household_demand"]
584
    # get spatial distribution of demands of private households per nuts
585
    # and size from demandregio
586
    ec_hh = disagg_households_power(scenario, year)
587
588
    # Scale to meet target value
589
    # For status2019 and eGon2021 the final demand from demandregio is kept
590
    if scenario not in ["status2019", "eGon2021"]:
591
        ec_hh *= (
592
            get_sector_parameters("electricity", scenario=scenario)[
593
                "annual_demand"
594
            ]["households"]
595
            * 1e6
596
            / ec_hh.sum().sum()
597
        )
598
599
    # Select demands for nuts3-regions in boundaries (needed for testmode)
600
    ec_hh = data_in_boundaries(ec_hh)
601
602
    # insert into database
603
    for hh_size in ec_hh.columns:
604
        df = pd.DataFrame(ec_hh[hh_size])
605
        df["year"] = (
606
            2023 if scenario == "status2023" else year
607
        )  # TODO status2023
608
        # adhoc fix until ffeopendata servers are up and population_year can be set
609
610
        df["scenario"] = scenario
611
        df["hh_size"] = hh_size
612
        df = df.rename({hh_size: "demand"}, axis="columns")
613
        df.to_sql(
614
            targets["table"],
615
            engine,
616
            schema=targets["schema"],
617
            if_exists="append",
618
        )
619
620
    # insert housholds demand timeseries
621
    try:
622
        hh_load_timeseries = (
623
            temporal.disagg_temporal_power_housholds_slp(
624
                use_nuts3code=True,
625
                by="households",
626
                weight_by_income=False,
627
                year=year,
628
            )
629
            .resample("h")
630
            .sum()
631
        )
632
        hh_load_timeseries.rename(
633
            columns={"DEB16": "DEB1C", "DEB19": "DEB1D"}, inplace=True
634
        )
635
    except Exception as e:
636
        logger.warning(
637
            f"Couldnt get profiles from FFE, will use pickeld fallback! \n {e}"
638
        )
639
        hh_load_timeseries = pd.read_csv(
640
            "data_bundle_egon_data/demand_regio_backup/df_load_profiles.csv",
641
            index_col="time",
642
        )
643
        hh_load_timeseries.index = pd.to_datetime(
644
            hh_load_timeseries.index, format="%Y-%m-%d %H:%M:%S"
645
        )
646
647
        def change_year(dt, year):
648
            return dt.replace(year=year)
649
650
        year = 2023 if scenario == "status2023" else year  # TODO status2023
651
        hh_load_timeseries.index = hh_load_timeseries.index.map(
652
            lambda dt: change_year(dt, year)
0 ignored issues
show
introduced by
The variable change_year does not seem to be defined for all execution paths.
Loading history...
653
        )
654
655
        if scenario == "status2023":
656
            hh_load_timeseries = hh_load_timeseries.shift(24 * 2)
657
658
            hh_load_timeseries.iloc[: 24 * 7] = hh_load_timeseries.iloc[
659
                24 * 7 : 24 * 7 * 2
660
            ].values
661
662
    write_demandregio_hh_profiles_to_db(hh_load_timeseries)
663
664
665
def insert_cts_ind(scenario, year, engine, target_values):
666
    """Calculates electrical demands of CTS and industry using demandregio's
667
    disaggregator, adjusts them according to resulting values of NEP 2021 or
668
    JRC IDEES and insert results into the database.
669
670
    Parameters
671
    ----------
672
    scenario : str
673
        Name of the corresponing scenario.
674
    year : int
675
        The number of households per region is taken from this year.
676
    target_values : dict
677
        List of target values for each scenario and sector.
678
679
    Returns
680
    -------
681
    None.
682
683
    """
684
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
685
        "targets"
686
    ]
687
688
    wz_table = pd.read_sql(
689
        "SELECT wz, sector FROM demand.egon_demandregio_wz",
690
        con=engine,
691
        index_col="wz",
692
    )
693
694
    # Workaround: Since the disaggregator does not work anymore, data from
695
    # previous runs is used for eGon2035 and eGon100RE
696
    if scenario == "eGon2035":
697
        file2035_path = (
698
            Path(".")
699
            / "data_bundle_egon_data"
700
            / "demand_regio_backup"
701
            / "egon_demandregio_cts_ind_egon2035.csv"
702
        )
703
        ec_cts_ind2 = pd.read_csv(file2035_path)
704
        ec_cts_ind2.to_sql(
705
            targets["cts_ind_demand"]["table"],
706
            engine,
707
            targets["cts_ind_demand"]["schema"],
708
            if_exists="append",
709
            index=False,
710
        )
711
        return
712
713
    if scenario == "eGon100RE":
714
        ec_cts_ind2 = pd.read_csv(
715
            "data_bundle_egon_data/demand_regio_backup/egon_demandregio_cts_ind.csv"
716
        )
717
        ec_cts_ind2["sector"] = ec_cts_ind2["wz"].map(wz_table["sector"])
718
        factor_ind = target_values["industry"] / (
719
            ec_cts_ind2[ec_cts_ind2["sector"] == "industry"]["demand"].sum()
720
            / 1000
721
        )
722
        factor_cts = target_values["CTS"] / (
723
            ec_cts_ind2[ec_cts_ind2["sector"] == "CTS"]["demand"].sum() / 1000
724
        )
725
726
        ec_cts_ind2["demand"] = ec_cts_ind2.apply(
727
            lambda x: (
728
                x["demand"] * factor_ind
0 ignored issues
show
introduced by
The variable factor_ind does not seem to be defined for all execution paths.
Loading history...
729
                if x["sector"] == "industry"
730
                else x["demand"] * factor_cts
0 ignored issues
show
introduced by
The variable factor_cts does not seem to be defined for all execution paths.
Loading history...
731
            ),
732
            axis=1,
733
        )
734
735
        ec_cts_ind2.drop(columns=["sector"], inplace=True)
736
737
        ec_cts_ind2.to_sql(
738
            targets["cts_ind_demand"]["table"],
739
            engine,
740
            targets["cts_ind_demand"]["schema"],
741
            if_exists="append",
742
            index=False,
743
        )
744
        return
745
746
    for sector in ["CTS", "industry"]:
747
        # get demands per nuts3 and wz of demandregio
748
        ec_cts_ind = spatial.disagg_CTS_industry(
749
            use_nuts3code=True, source="power", sector=sector, year=year
750
        ).transpose()
751
752
        ec_cts_ind.index = ec_cts_ind.index.rename("nuts3")
753
754
        # exclude mobility sector from GHD
755
        ec_cts_ind = ec_cts_ind.drop(columns=49, errors="ignore")
756
757
        # scale values according to target_values
758
        if target_values:
759
            if sector in target_values.keys():
760
                ec_cts_ind *= (
761
                    target_values[sector] * 1e3 / ec_cts_ind.sum().sum()
762
                )
763
        else:
764
            print(
765
                f"No scaling factors for scenario {scenario}."
766
                "Data from demandregio is used without scaling."
767
            )
768
        # include new largescale consumers according to NEP 2021
769
        if scenario == "eGon2035":
770
            ec_cts_ind = adjust_cts_ind_nep(ec_cts_ind, sector)
771
        # include new industrial demands due to sector coupling
772
        if (scenario == "eGon100RE") & (sector == "industry"):
773
            ec_cts_ind = adjust_ind_pes(ec_cts_ind)
774
775
        # Select demands for nuts3-regions in boundaries (needed for testmode)
776
        ec_cts_ind = data_in_boundaries(ec_cts_ind)
777
778
        # insert into database
779
        for wz in ec_cts_ind.columns:
780
            df = pd.DataFrame(ec_cts_ind[wz])
781
            df["year"] = year
782
            df["wz"] = wz
783
            df["scenario"] = scenario
784
            df = df.rename({wz: "demand"}, axis="columns")
785
            df.index = df.index.rename("nuts3")
786
            df.to_sql(
787
                targets["cts_ind_demand"]["table"],
788
                engine,
789
                targets["cts_ind_demand"]["schema"],
790
                if_exists="append",
791
            )
792
793
794
def insert_household_demand():
795
    """Insert electrical demands for households according to
796
    demandregio using its disaggregator-tool in MWh
797
798
    Returns
799
    -------
800
    None.
801
802
    """
803
    targets = egon.data.config.datasets()["demandregio_household_demand"][
804
        "targets"
805
    ]
806
    engine = db.engine()
807
808
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
809
810
    scenarios.append("eGon2021")
811
812
    for t in targets:
813
        db.execute_sql(
814
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
815
        )
816
817
    for scn in scenarios:
818
        year = (
819
            2023
820
            if scn == "status2023"
821
            else scenario_parameters.global_settings(scn)["population_year"]
822
        )
823
824
        # Insert demands of private households
825
        insert_hh_demand(scn, year, engine)
826
827
828
def insert_cts_ind_demands():
829
    """Insert electricity demands per nuts3-region in Germany according to
830
    demandregio using its disaggregator-tool in MWh
831
832
    Returns
833
    -------
834
    None.
835
836
    """
837
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
838
        "targets"
839
    ]
840
    engine = db.engine()
841
842
    for t in targets:
843
        db.execute_sql(
844
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
845
        )
846
847
    insert_cts_ind_wz_definitions()
848
849
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
850
851
    scenarios.append("eGon2021")
852
853
    for scn in scenarios:
854
        year = scenario_parameters.global_settings(scn)["population_year"]
855
856
        if year > 2035:
857
            year = 2035
858
859
        # target values per scenario in MWh
860
        # for eGon2021 and status2019 demandregio-data is used without scaling
861
        if scn not in ["eGon2021", "status2019"]:
862
            target_values = {
863
                "CTS": get_sector_parameters("electricity", scenario=scn)[
864
                    "annual_demand"
865
                ]["CTS"],
866
                "industry": get_sector_parameters("electricity", scenario=scn)[
867
                    "annual_demand"
868
                ]["industry"],
869
            }
870
        else:
871
            target_values = None
872
873
        insert_cts_ind(scn, year, engine, target_values)
874
875
    # Insert load curves per wz
876
    timeseries_per_wz()
877
878
879
def insert_society_data():
880
    """Insert population and number of households per nuts3-region in Germany
881
    according to demandregio using its disaggregator-tool
882
883
    Returns
884
    -------
885
    None.
886
887
    """
888
    targets = egon.data.config.datasets()["demandregio_society"]["targets"]
889
    engine = db.engine()
890
891
    for t in targets:
892
        db.execute_sql(
893
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
894
        )
895
896
    target_years = np.append(
897
        get_sector_parameters("global").population_year.values, 2018
898
    )
899
900
    for year in target_years:
901
        df_pop = pd.DataFrame(data.population(year=year))
902
        df_pop["year"] = year
903
        df_pop = df_pop.rename({"value": "population"}, axis="columns")
904
        # Select data for nuts3-regions in boundaries (needed for testmode)
905
        df_pop = data_in_boundaries(df_pop)
906
        df_pop.to_sql(
907
            targets["population"]["table"],
908
            engine,
909
            schema=targets["population"]["schema"],
910
            if_exists="append",
911
        )
912
913
    for year in target_years:
914
        df_hh = pd.DataFrame(data.households_per_size(year=year))
915
        # Select data for nuts3-regions in boundaries (needed for testmode)
916
        df_hh = data_in_boundaries(df_hh)
917
        for hh_size in df_hh.columns:
918
            df = pd.DataFrame(df_hh[hh_size])
919
            df["year"] = year
920
            df["hh_size"] = hh_size
921
            df = df.rename({hh_size: "households"}, axis="columns")
922
            df.to_sql(
923
                targets["household"]["table"],
924
                engine,
925
                schema=targets["household"]["schema"],
926
                if_exists="append",
927
            )
928
929
930
def insert_timeseries_per_wz(sector, year):
931
    """Insert normalized electrical load time series for the selected sector
932
933
    Parameters
934
    ----------
935
    sector : str
936
        Name of the sector. ['CTS', 'industry']
937
    year : int
938
        Selected weather year
939
940
    Returns
941
    -------
942
    None.
943
944
    """
945
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
946
        "targets"
947
    ]
948
949
    if sector == "CTS":
950
        profiles = (
951
            data.CTS_power_slp_generator("SH", year=year)
952
            .drop(
953
                [
954
                    "Day",
955
                    "Hour",
956
                    "DayOfYear",
957
                    "WD",
958
                    "SA",
959
                    "SU",
960
                    "WIZ",
961
                    "SOZ",
962
                    "UEZ",
963
                ],
964
                axis="columns",
965
            )
966
            .resample("H")
967
            .sum()
968
        )
969
        wz_slp = config.slp_branch_cts_power()
970
    elif sector == "industry":
971
        profiles = (
972
            data.shift_load_profile_generator(state="SH", year=year)
973
            .resample("H")
974
            .sum()
975
        )
976
        wz_slp = config.shift_profile_industry()
977
978
    else:
979
        print(f"Sector {sector} is not valid.")
980
981
    df = pd.DataFrame(
982
        index=wz_slp.keys(), columns=["slp", "load_curve", "year"]
0 ignored issues
show
introduced by
The variable wz_slp does not seem to be defined for all execution paths.
Loading history...
983
    )
984
985
    df.index.rename("wz", inplace=True)
986
987
    df.slp = wz_slp.values()
988
989
    df.year = year
990
991
    df.load_curve = profiles[df.slp].transpose().values.tolist()
0 ignored issues
show
introduced by
The variable profiles does not seem to be defined for all execution paths.
Loading history...
992
993
    db.execute_sql(
994
        f"""
995
                   DELETE FROM {targets['timeseries_cts_ind']['schema']}.
996
                   {targets['timeseries_cts_ind']['table']}
997
                   WHERE wz IN (
998
                       SELECT wz FROM {targets['wz_definitions']['schema']}.
999
                       {targets['wz_definitions']['table']}
1000
                       WHERE sector = '{sector}')
1001
                   """
1002
    )
1003
1004
    df.to_sql(
1005
        targets["timeseries_cts_ind"]["table"],
1006
        schema=targets["timeseries_cts_ind"]["schema"],
1007
        con=db.engine(),
1008
        if_exists="append",
1009
    )
1010
1011
1012
def timeseries_per_wz():
1013
    """Calcultae and insert normalized timeseries per wz for cts and industry
1014
1015
    Returns
1016
    -------
1017
    None.
1018
1019
    """
1020
1021
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
1022
    year_already_in_database = []
1023
    for scn in scenarios:
1024
        year = int(scenario_parameters.global_settings(scn)["weather_year"])
1025
1026
        for sector in ["CTS", "industry"]:
1027
            if not year in year_already_in_database:
1028
                insert_timeseries_per_wz(sector, int(year))
1029
        year_already_in_database.append(year)
1030
1031
1032
def get_cached_tables():
1033
    """Get cached demandregio tables and db-dump from former runs"""
1034
    data_config = egon.data.config.datasets()
1035
    for s in ["cache", "dbdump"]:
1036
        source_path = data_config["demandregio_workaround"]["source"][s][
1037
            "path"
1038
        ]
1039
        target_path = Path(
1040
            ".", data_config["demandregio_workaround"]["targets"][s]["path"]
1041
        )
1042
        os.makedirs(target_path, exist_ok=True)
1043
1044
        with zipfile.ZipFile(source_path, "r") as zip_ref:
1045
            zip_ref.extractall(path=target_path)
1046