Passed
Pull Request — dev (#1362)
by
unknown
02:01
created

data.datasets.demandregio.insert_cts_ind_demands()   B

Complexity

Conditions 5

Size

Total Lines 49
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 49
rs 8.8133
c 0
b 0
f 0
cc 5
nop 0
1
"""The central module containing all code dealing with importing and
2
adjusting data from demandRegio
3
4
"""
5
6
from pathlib import Path
7
import os
8
import zipfile
9
10
from sqlalchemy import ARRAY, Column, Float, ForeignKey, Integer, String
11
from sqlalchemy.ext.declarative import declarative_base
12
import numpy as np
13
import pandas as pd
14
15
from egon.data import db, logger
16
from egon.data.datasets import Dataset, wrapped_partial
17
from egon.data.datasets.scenario_parameters import (
18
    EgonScenario,
19
    get_sector_parameters,
20
)
21
from egon.data.datasets.zensus import download_and_check
22
import egon.data.config
23
import egon.data.datasets.scenario_parameters.parameters as scenario_parameters
24
25
try:
26
    from disaggregator import config, data, spatial, temporal
27
28
except ImportError as e:
29
    pass
30
31
# will be later imported from another file ###
32
Base = declarative_base()
33
34
35
class DemandRegio(Dataset):
36
    """
37
    Extract and adjust data from DemandRegio
38
39
    Demand data for the sectors households, CTS and industry are calculated
40
    using DemandRegio's diaggregator and input data. To bring the resulting
41
    data in line with other data used in eGon-data and the eGon project in
42
    general some data needed to be adjusted or extended, e.g. in function
43
    :py:func:`adjust_ind_pes` or function :py:func:`adjust_cts_ind_nep`. The
44
    resulting data is written into newly created tables.
45
46
    *Dependencies*
47
      * :py:class:`DataBundle <egon.data.datasets.data_bundle.DataBundle>`
48
      * :py:class:`ScenarioParameters <egon.data.datasets.scenario_parameters.ScenarioParameters>`
49
      * :py:class:`ZensusVg250 <egon.data.datasets.zensus_vg250.ZensusVg250>`
50
51
    *Resulting tables*
52
      * :py:class:`demand.egon_demandregio_hh <egon.data.datasets.demandregio.EgonDemandRegioHH>` is created and filled
53
      * :py:class:`demand.egon_demandregio_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioCtsInd>` is created and filled
54
      * :py:class:`society.egon_demandregio_population <egon.data.datasets.demandregio.EgonDemandRegioPopulation>` is created and filled
55
      * :py:class:`society.egon_demandregio_household <egon.data.datasets.demandregio.EgonDemandRegioHouseholds>` is created and filled
56
      * :py:class:`demand.egon_demandregio_wz <egon.data.datasets.demandregio.EgonDemandRegioWz>` is created and filled
57
      * :py:class:`demand.egon_demandregio_timeseries_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioTimeseriesCtsInd>` is created and filled
58
59
    """
60
61
    #:
62
    name: str = "DemandRegio"
63
    #:
64
    version: str = "0.0.13"
65
66
    def __init__(self, dependencies):
67
        super().__init__(
68
            name=self.name,
69
            version=self.version,
70
            dependencies=dependencies,
71
            tasks=(
72
                get_cached_tables,  # adhoc workaround #180
73
                create_tables,
74
                {
75
                    insert_household_demand,
76
                    insert_society_data,
77
                    insert_cts_ind_demands,
78
                },
79
            ),
80
        )
81
82
83
class DemandRegioLoadProfiles(Base):
84
    __tablename__ = "demandregio_household_load_profiles"
85
    __table_args__ = {"schema": "demand"}
86
87
    id = Column(Integer, primary_key=True)
88
    year = Column(Integer)
89
    nuts3 = Column(String)
90
    load_in_mwh = Column(ARRAY(Float()))
91
92
93
class EgonDemandRegioHH(Base):
94
    __tablename__ = "egon_demandregio_hh"
95
    __table_args__ = {"schema": "demand"}
96
    nuts3 = Column(String(5), primary_key=True)
97
    hh_size = Column(Integer, primary_key=True)
98
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
99
    year = Column(Integer)
100
    demand = Column(Float)
101
102
103
class EgonDemandRegioCtsInd(Base):
104
    __tablename__ = "egon_demandregio_cts_ind"
105
    __table_args__ = {"schema": "demand"}
106
    nuts3 = Column(String(5), primary_key=True)
107
    wz = Column(Integer, primary_key=True)
108
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
109
    year = Column(Integer)
110
    demand = Column(Float)
111
112
113
class EgonDemandRegioPopulation(Base):
114
    __tablename__ = "egon_demandregio_population"
115
    __table_args__ = {"schema": "society"}
116
    nuts3 = Column(String(5), primary_key=True)
117
    year = Column(Integer, primary_key=True)
118
    population = Column(Float)
119
120
121
class EgonDemandRegioHouseholds(Base):
122
    __tablename__ = "egon_demandregio_household"
123
    __table_args__ = {"schema": "society"}
124
    nuts3 = Column(String(5), primary_key=True)
125
    hh_size = Column(Integer, primary_key=True)
126
    year = Column(Integer, primary_key=True)
127
    households = Column(Integer)
128
129
130
class EgonDemandRegioWz(Base):
131
    __tablename__ = "egon_demandregio_wz"
132
    __table_args__ = {"schema": "demand"}
133
    wz = Column(Integer, primary_key=True)
134
    sector = Column(String(50))
135
    definition = Column(String(150))
136
137
138
class EgonDemandRegioTimeseriesCtsInd(Base):
139
    __tablename__ = "egon_demandregio_timeseries_cts_ind"
140
    __table_args__ = {"schema": "demand"}
141
    wz = Column(Integer, primary_key=True)
142
    year = Column(Integer, primary_key=True)
143
    slp = Column(String(50))
144
    load_curve = Column(ARRAY(Float()))
145
146
147
def create_tables():
148
    """Create tables for demandregio data
149
    Returns
150
    -------
151
    None.
152
    """
153
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;")
154
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS society;")
155
    engine = db.engine()
156
    EgonDemandRegioHH.__table__.create(bind=engine, checkfirst=True)
157
    EgonDemandRegioCtsInd.__table__.create(bind=engine, checkfirst=True)
158
    EgonDemandRegioPopulation.__table__.create(bind=engine, checkfirst=True)
159
    EgonDemandRegioHouseholds.__table__.create(bind=engine, checkfirst=True)
160
    EgonDemandRegioWz.__table__.create(bind=engine, checkfirst=True)
161
    DemandRegioLoadProfiles.__table__.create(bind=db.engine(), checkfirst=True)
162
    EgonDemandRegioTimeseriesCtsInd.__table__.drop(
163
        bind=engine, checkfirst=True
164
    )
165
    EgonDemandRegioTimeseriesCtsInd.__table__.create(
166
        bind=engine, checkfirst=True
167
    )
168
169
170
def data_in_boundaries(df):
171
    """Select rows with nuts3 code within boundaries, used for testmode
172
173
    Parameters
174
    ----------
175
    df : pandas.DataFrame
176
        Data for all nuts3 regions
177
178
    Returns
179
    -------
180
    pandas.DataFrame
181
        Data for nuts3 regions within boundaries
182
183
    """
184
    engine = db.engine()
185
186
    df = df.reset_index()
187
188
    # Change nuts3 region names to 2016 version
189
    nuts_names = {"DEB16": "DEB1C", "DEB19": "DEB1D"}
190
    df.loc[df.nuts3.isin(nuts_names), "nuts3"] = df.loc[
191
        df.nuts3.isin(nuts_names), "nuts3"
192
    ].map(nuts_names)
193
194
    df = df.set_index("nuts3")
195
196
    return df[
197
        df.index.isin(
198
            pd.read_sql(
199
                "SELECT DISTINCT ON (nuts) nuts FROM boundaries.vg250_krs",
200
                engine,
201
            ).nuts
202
        )
203
    ]
204
205
206
def insert_cts_ind_wz_definitions():
207
    """Insert demandregio's definitions of CTS and industrial branches
208
209
    Returns
210
    -------
211
    None.
212
213
    """
214
215
    source = egon.data.config.datasets()["demandregio_cts_ind_demand"][
216
        "sources"
217
    ]
218
219
    target = egon.data.config.datasets()["demandregio_cts_ind_demand"][
220
        "targets"
221
    ]["wz_definitions"]
222
223
    engine = db.engine()
224
225
    for sector in source["wz_definitions"]:
226
        file_path = (
227
            Path(".")
228
            / "data_bundle_egon_data"
229
            / "WZ_definition"
230
            / source["wz_definitions"][sector]
231
        )
232
233
        if sector == "CTS":
234
            delimiter = ";"
235
        else:
236
            delimiter = ","
237
        df = (
238
            pd.read_csv(file_path, delimiter=delimiter, header=None)
239
            .rename({0: "wz", 1: "definition"}, axis="columns")
240
            .set_index("wz")
241
        )
242
        df["sector"] = sector
243
        df.to_sql(
244
            target["table"],
245
            engine,
246
            schema=target["schema"],
247
            if_exists="append",
248
        )
249
250
251
def match_nuts3_bl():
252
    """Function that maps the federal state to each nuts3 region
253
254
    Returns
255
    -------
256
    df : pandas.DataFrame
257
        List of nuts3 regions and the federal state of Germany.
258
259
    """
260
261
    engine = db.engine()
262
263
    df = pd.read_sql(
264
        "SELECT DISTINCT ON (boundaries.vg250_krs.nuts) "
265
        "boundaries.vg250_krs.nuts, boundaries.vg250_lan.gen "
266
        "FROM boundaries.vg250_lan, boundaries.vg250_krs "
267
        " WHERE ST_CONTAINS("
268
        "boundaries.vg250_lan.geometry, "
269
        "boundaries.vg250_krs.geometry)",
270
        con=engine,
271
    )
272
273
    df.gen[df.gen == "Baden-Württemberg (Bodensee)"] = "Baden-Württemberg"
274
    df.gen[df.gen == "Bayern (Bodensee)"] = "Bayern"
275
276
    return df.set_index("nuts")
277
278
279
def adjust_ind_pes(ec_cts_ind):
280
    """
281
    Adjust electricity demand of industrial consumers due to electrification
282
    of process heat based on assumptions of pypsa-eur-sec.
283
284
    Parameters
285
    ----------
286
    ec_cts_ind : pandas.DataFrame
287
        Industrial demand without additional electrification
288
289
    Returns
290
    -------
291
    ec_cts_ind : pandas.DataFrame
292
        Industrial demand with additional electrification
293
294
    """
295
296
    pes_path = (
297
        Path(".") / "data_bundle_powerd_data" / "pypsa_eur" / "resources"
298
    )
299
300
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
301
        "sources"
302
    ]["new_consumers_2050"]
303
304
    # Extract today's industrial demand from pypsa-eur-sec
305
    demand_today = pd.read_csv(
306
        pes_path / sources["pes-demand-today"],
307
        header=None,
308
    ).transpose()
309
310
    # Filter data
311
    demand_today[1].fillna("carrier", inplace=True)
312
    demand_today = demand_today[
313
        (demand_today[0] == "DE") | (demand_today[1] == "carrier")
314
    ].drop([0, 2], axis="columns")
315
316
    demand_today = (
317
        demand_today.transpose()
318
        .set_index(0)
319
        .transpose()
320
        .set_index("carrier")
321
        .transpose()
322
        .loc["electricity"]
323
        .astype(float)
324
    )
325
326
    # Calculate future industrial demand from pypsa-eur-sec
327
    # based on production and energy demands per carrier ('sector ratios')
328
    prod_tomorrow = pd.read_csv(pes_path / sources["pes-production-tomorrow"])
329
330
    prod_tomorrow = prod_tomorrow[prod_tomorrow["kton/a"] == "DE"].set_index(
331
        "kton/a"
332
    )
333
334
    sector_ratio = (
335
        pd.read_csv(pes_path / sources["pes-sector-ratios"])
336
        .set_index("MWh/tMaterial")
337
        .loc["elec"]
338
    )
339
340
    demand_tomorrow = prod_tomorrow.multiply(
341
        sector_ratio.div(1000)
342
    ).transpose()["DE"]
343
344
    # Calculate changes of electrical demand per sector in pypsa-eur-sec
345
    change = pd.DataFrame(
346
        (demand_tomorrow / demand_today)
347
        / (demand_tomorrow / demand_today).sum()
348
    )
349
350
    # Drop rows without changes
351
    change = change[~change[0].isnull()]
352
353
    # Map industrial branches of pypsa-eur-sec to WZ2008 used in demandregio
354
    change["wz"] = change.index.map(
355
        {
356
            "Alumina production": 24,
357
            "Aluminium - primary production": 24,
358
            "Aluminium - secondary production": 24,
359
            "Ammonia": 20,
360
            "Basic chemicals (without ammonia)": 20,
361
            "Cement": 23,
362
            "Ceramics & other NMM": 23,
363
            "Electric arc": 24,
364
            "Food, beverages and tobacco": 10,
365
            "Glass production": 23,
366
            "Integrated steelworks": 24,
367
            "Machinery Equipment": 28,
368
            "Other Industrial Sectors": 32,
369
            "Other chemicals": 20,
370
            "Other non-ferrous metals": 24,
371
            "Paper production": 17,
372
            "Pharmaceutical products etc.": 21,
373
            "Printing and media reproduction": 18,
374
            "Pulp production": 17,
375
            "Textiles and leather": 13,
376
            "Transport Equipment": 29,
377
            "Wood and wood products": 16,
378
        }
379
    )
380
381
    # Group by WZ2008
382
    shares_per_wz = change.groupby("wz")[0].sum()
383
384
    # Calculate addtional demands needed to meet future demand of pypsa-eur-sec
385
    addtional_mwh = shares_per_wz.multiply(
386
        demand_tomorrow.sum() * 1000000 - ec_cts_ind.sum().sum()
387
    )
388
389
    # Calulate overall industrial demand for eGon100RE
390
    final_mwh = addtional_mwh + ec_cts_ind[addtional_mwh.index].sum()
391
392
    # Linear scale the industrial demands per nuts3 and wz to meet final demand
393
    ec_cts_ind[addtional_mwh.index] *= (
394
        final_mwh / ec_cts_ind[addtional_mwh.index].sum()
395
    )
396
397
    return ec_cts_ind
398
399
400
def adjust_cts_ind_nep(ec_cts_ind, sector):
401
    """Add electrical demand of new largescale CTS und industrial consumers
402
    according to NEP 2021, scneario C 2035. Values per federal state are
403
    linear distributed over all CTS branches and nuts3 regions.
404
405
    Parameters
406
    ----------
407
    ec_cts_ind : pandas.DataFrame
408
        CTS or industry demand without new largescale consumers.
409
410
    Returns
411
    -------
412
    ec_cts_ind : pandas.DataFrame
413
        CTS or industry demand including new largescale consumers.
414
415
    """
416
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
417
        "sources"
418
    ]
419
420
    file_path = (
421
        Path(".")
422
        / "data_bundle_egon_data"
423
        / "nep2035_version2021"
424
        / sources["new_consumers_2035"]
425
    )
426
427
    # get data from NEP per federal state
428
    new_con = pd.read_csv(file_path, delimiter=";", decimal=",", index_col=0)
429
430
    # match nuts3 regions to federal states
431
    groups = ec_cts_ind.groupby(match_nuts3_bl().gen)
432
433
    # update demands per federal state
434
    for group in groups.indices.keys():
435
        g = groups.get_group(group)
436
        data_new = g.mul(1 + new_con[sector][group] * 1e6 / g.sum().sum())
437
        ec_cts_ind[ec_cts_ind.index.isin(g.index)] = data_new
438
439
    return ec_cts_ind
440
441
442
def disagg_households_power(scenario, year, original=False, **kwargs):
443
    """
444
    Perform spatial disaggregation of electric power in [GWh/a] by key
445
    Similar to disaggregator.spatial.disagg_households_power
446
447
448
    Parameters
449
    ----------
450
    by : str
451
        must be one of ['households', 'population']
452
    orignal : bool, optional
453
        Throughput to function households_per_size,
454
        A flag if the results should be left untouched and returned in
455
        original form for the year 2011 (True) or if they should be scaled to
456
        the given `year` by the population in that year (False).
457
458
    Returns
459
    -------
460
    pd.DataFrame or pd.Series
461
    """
462
    # source: survey of energieAgenturNRW
463
    # with/without direct water heating (DHW), and weighted average
464
    # https://1-stromvergleich.com/wp-content/uploads/erhebung_wo_bleibt_der_strom.pdf
465
    demand_per_hh_size = pd.DataFrame(
466
        index=range(1, 7),
467
        data={
468
            # "weighted DWH": [2290, 3202, 4193, 4955, 5928, 5928],
469
            # "without DHW": [1714, 2812, 3704, 4432, 5317, 5317],
470
            "with_DHW": [2181, 3843, 5151, 6189, 7494, 8465],
471
            "without_DHW": [1798, 2850, 3733, 4480, 5311, 5816],
472
            "weighted": [2256, 3248, 4246, 5009, 5969, 6579],
473
        },
474
    )
475
476
    if scenario == "eGon100RE":
477
        # chose demand per household size from survey without DHW
478
        power_per_HH = demand_per_hh_size["without DHW"] / 1e3
479
480
        # calculate demand per nuts3 in 2011
481
        df_2011 = data.households_per_size(year=2011) * power_per_HH
482
483
        # scale demand per hh-size to meet demand without heat
484
        # according to JRC in 2011 (136.6-(20.14+9.41) TWh)
485
        power_per_HH *= (136.6 - (20.14 + 9.41)) * 1e6 / df_2011.sum().sum()
486
487
        # calculate demand per nuts3 in 2050
488
        df = data.households_per_size(year=year) * power_per_HH
489
490
    # Bottom-Up: Power demand by household sizes in [MWh/a] for each scenario
491
    else:
492
        # chose demand per household size from survey including weighted DHW
493
        power_per_HH = demand_per_hh_size["weighted"] / 1e3
494
495
        # calculate demand per nuts3
496
        df = (
497
            data.households_per_size(original=original, year=year)
498
            * power_per_HH
499
        )
500
501
    return df
502
503
504
def write_demandregio_hh_profiles_to_db(hh_profiles):
505
    """Write HH demand profiles from demand regio into db. One row per
506
    year and nuts3. The annual load profile timeseries is an array.
507
508
    schema: demand
509
    tablename: demandregio_household_load_profiles
510
511
512
513
    Parameters
514
    ----------
515
    hh_profiles: pd.DataFrame
516
517
    Returns
518
    -------
519
    """
520
    years = hh_profiles.index.year.unique().values
521
    df_to_db = pd.DataFrame(
522
        columns=["id", "year", "nuts3", "load_in_mwh"]
523
    ).set_index("id")
524
    dataset = egon.data.config.settings()["egon-data"]["--dataset-boundary"]
525
526
    if dataset == "Schleswig-Holstein":
527
        hh_profiles = hh_profiles.loc[
528
            :, hh_profiles.columns.str.contains("DEF0")
529
        ]
530
531
    idx = pd.read_sql_query(
532
        f"""
533
                           SELECT MAX(id)
534
                           FROM {DemandRegioLoadProfiles.__table__.schema}.
535
                           {DemandRegioLoadProfiles.__table__.name}
536
                           """,
537
        con=db.engine(),
538
    ).iat[0, 0]
539
540
    idx = 0 if idx is None else idx + 1
541
542
    for year in years:
543
        df = hh_profiles[hh_profiles.index.year == year]
544
545
        for nuts3 in hh_profiles.columns:
546
            idx += 1
547
            df_to_db.at[idx, "year"] = year
548
            df_to_db.at[idx, "nuts3"] = nuts3
549
            df_to_db.at[idx, "load_in_mwh"] = df[nuts3].to_list()
550
551
    df_to_db["year"] = df_to_db["year"].apply(int)
552
    df_to_db["nuts3"] = df_to_db["nuts3"].astype(str)
553
    df_to_db["load_in_mwh"] = df_to_db["load_in_mwh"].apply(list)
554
    df_to_db = df_to_db.reset_index()
555
556
    df_to_db.to_sql(
557
        name=DemandRegioLoadProfiles.__table__.name,
558
        schema=DemandRegioLoadProfiles.__table__.schema,
559
        con=db.engine(),
560
        if_exists="append",
561
        index=-False,
562
    )
563
564
565
def insert_hh_demand(scenario, year, engine):
566
    """Calculates electrical demands of private households using demandregio's
567
    disaggregator and insert results into the database.
568
569
    Parameters
570
    ----------
571
    scenario : str
572
        Name of the corresponding scenario.
573
    year : int
574
        The number of households per region is taken from this year.
575
576
    Returns
577
    -------
578
    None.
579
580
    """
581
    targets = egon.data.config.datasets()["demandregio_household_demand"][
582
        "targets"
583
    ]["household_demand"]
584
    # get spatial distribution of demands of private households per nuts
585
    # and size from demandregio
586
    ec_hh = disagg_households_power(scenario, year)
587
588
    # Scale to meet target value
589
    # For status2019 and eGon2021 the final demand from demandregio is kept
590
    if scenario not in ["status2019", "eGon2021"]:
591
        ec_hh *= (
592
            get_sector_parameters("electricity", scenario=scenario)[
593
                "annual_demand"
594
            ]["households"]
595
            * 1e6
596
            / ec_hh.sum().sum()
597
        )
598
599
    # Select demands for nuts3-regions in boundaries (needed for testmode)
600
    ec_hh = data_in_boundaries(ec_hh)
601
602
    # insert into database
603
    for hh_size in ec_hh.columns:
604
        df = pd.DataFrame(ec_hh[hh_size])
605
        df["year"] = (
606
            2023 if scenario == "status2023" else year
607
        )  # TODO status2023
608
        # adhoc fix until ffeopendata servers are up and population_year can be set
609
610
        df["scenario"] = scenario
611
        df["hh_size"] = hh_size
612
        df = df.rename({hh_size: "demand"}, axis="columns")
613
        df.to_sql(
614
            targets["table"],
615
            engine,
616
            schema=targets["schema"],
617
            if_exists="append",
618
        )
619
620
    # insert housholds demand timeseries
621
    try:
622
        hh_load_timeseries = (
623
            temporal.disagg_temporal_power_housholds_slp(
624
                use_nuts3code=True,
625
                by="households",
626
                weight_by_income=False,
627
                year=year,
628
            )
629
            .resample("h")
630
            .sum()
631
        )
632
        hh_load_timeseries.rename(
633
            columns={"DEB16": "DEB1C", "DEB19": "DEB1D"}, inplace=True
634
        )
635
    except Exception as e:
636
        logger.warning(
637
            f"Couldnt get profiles from FFE, will use pickeld fallback! \n {e}"
638
        )
639
        hh_load_timeseries = pd.read_csv(
640
            "data_bundle_egon_data/demand_regio_backup/df_load_profiles.csv",
641
            index_col="time",
642
        )
643
        hh_load_timeseries.index = pd.to_datetime(
644
            hh_load_timeseries.index, format="%Y-%m-%d %H:%M:%S"
645
        )
646
647
        def change_year(dt, year):
648
            return dt.replace(year=year)
649
650
        year = 2023 if scenario == "status2023" else year  # TODO status2023
651
        hh_load_timeseries.index = hh_load_timeseries.index.map(
652
            lambda dt: change_year(dt, year)
0 ignored issues
show
introduced by
The variable change_year does not seem to be defined for all execution paths.
Loading history...
653
        )
654
655
        if scenario == "status2023":
656
            hh_load_timeseries = hh_load_timeseries.shift(24 * 2)
657
658
            hh_load_timeseries.iloc[: 24 * 7] = hh_load_timeseries.iloc[
659
                24 * 7 : 24 * 7 * 2
660
            ].values
661
662
    write_demandregio_hh_profiles_to_db(hh_load_timeseries)
663
664
665
def insert_cts_ind(scenario, year, engine, target_values):
666
    """Calculates electrical demands of CTS and industry using demandregio's
667
    disaggregator, adjusts them according to resulting values of NEP 2021 or
668
    JRC IDEES and insert results into the database.
669
670
    Parameters
671
    ----------
672
    scenario : str
673
        Name of the corresponing scenario.
674
    year : int
675
        The number of households per region is taken from this year.
676
    target_values : dict
677
        List of target values for each scenario and sector.
678
679
    Returns
680
    -------
681
    None.
682
683
    """
684
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
685
        "targets"
686
    ]
687
688
    wz_table = pd.read_sql(
689
        "SELECT wz, sector FROM demand.egon_demandregio_wz",
690
        con=engine,
691
        index_col="wz",
692
    )
693
694
    # Workaround: Since the disaggregator does not work anymore, data from
695
    # previous runs is used for eGon2035 and eGon100RE
696
    if scenario == "eGon2035":
697
        file2035_path = (
698
            Path(".")
699
            / "data_bundle_egon_data"
700
            / "demand_regio_backup"
701
            / "egon_demandregio_cts_ind_egon2035.csv"
702
        )
703
        ec_cts_ind2 = pd.read_csv(file2035_path)
704
        ec_cts_ind2.to_sql(
705
            targets["cts_ind_demand"]["table"],
706
            engine,
707
            targets["cts_ind_demand"]["schema"],
708
            if_exists="append",
709
            index=False,
710
        )
711
        return
712
713
    if scenario == "eGon100RE":
714
        ec_cts_ind2 = pd.read_csv(
715
            "data_bundle_egon_data/demand_regio_backup/egon_demandregio_cts_ind.csv"
716
        )
717
        ec_cts_ind2["sector"] = ec_cts_ind2["wz"].map(wz_table["sector"])
718
        factor_ind = target_values["industry"] / (
719
            ec_cts_ind2[ec_cts_ind2["sector"] == "industry"]["demand"].sum()
720
            / 1000
721
        )
722
        factor_cts = target_values["CTS"] / (
723
            ec_cts_ind2[ec_cts_ind2["sector"] == "CTS"]["demand"].sum() / 1000
724
        )
725
726
        ec_cts_ind2["demand"] = ec_cts_ind2.apply(
727
            lambda x: (
728
                x["demand"] * factor_ind
0 ignored issues
show
introduced by
The variable factor_ind does not seem to be defined for all execution paths.
Loading history...
729
                if x["sector"] == "industry"
730
                else x["demand"] * factor_cts
0 ignored issues
show
introduced by
The variable factor_cts does not seem to be defined for all execution paths.
Loading history...
731
            ),
732
            axis=1,
733
        )
734
735
        ec_cts_ind2.drop(columns=["sector"], inplace=True)
736
737
        ec_cts_ind2.to_sql(
738
            targets["cts_ind_demand"]["table"],
739
            engine,
740
            targets["cts_ind_demand"]["schema"],
741
            if_exists="append",
742
            index=False,
743
        )
744
        return
745
746
    for sector in ["CTS", "industry"]:
747
        # get demands per nuts3 and wz of demandregio
748
        ec_cts_ind = spatial.disagg_CTS_industry(
749
            use_nuts3code=True, source="power", sector=sector, year=year
750
        ).transpose()
751
752
        ec_cts_ind.index = ec_cts_ind.index.rename("nuts3")
753
754
        # exclude mobility sector from GHD
755
        ec_cts_ind = ec_cts_ind.drop(columns=49, errors="ignore")
756
757
        # scale values according to target_values
758
        if sector in target_values[scenario].keys():
759
            ec_cts_ind *= target_values[sector] * 1e3 / ec_cts_ind.sum().sum()
760
761
        # include new largescale consumers according to NEP 2021
762
        if scenario == "eGon2035":
763
            ec_cts_ind = adjust_cts_ind_nep(ec_cts_ind, sector)
764
        # include new industrial demands due to sector coupling
765
        if (scenario == "eGon100RE") & (sector == "industry"):
766
            ec_cts_ind = adjust_ind_pes(ec_cts_ind)
767
768
        # Select demands for nuts3-regions in boundaries (needed for testmode)
769
        ec_cts_ind = data_in_boundaries(ec_cts_ind)
770
771
        # insert into database
772
        for wz in ec_cts_ind.columns:
773
            df = pd.DataFrame(ec_cts_ind[wz])
774
            df["year"] = year
775
            df["wz"] = wz
776
            df["scenario"] = scenario
777
            df = df.rename({wz: "demand"}, axis="columns")
778
            df.index = df.index.rename("nuts3")
779
            df.to_sql(
780
                targets["cts_ind_demand"]["table"],
781
                engine,
782
                targets["cts_ind_demand"]["schema"],
783
                if_exists="append",
784
            )
785
786
787
def insert_household_demand():
788
    """Insert electrical demands for households according to
789
    demandregio using its disaggregator-tool in MWh
790
791
    Returns
792
    -------
793
    None.
794
795
    """
796
    targets = egon.data.config.datasets()["demandregio_household_demand"][
797
        "targets"
798
    ]
799
    engine = db.engine()
800
801
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
802
803
    scenarios.append("eGon2021")
804
805
    for t in targets:
806
        db.execute_sql(
807
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
808
        )
809
810
    for scn in scenarios:
811
        year = (
812
            2023
813
            if scn == "status2023"
814
            else scenario_parameters.global_settings(scn)["population_year"]
815
        )
816
817
        # Insert demands of private households
818
        insert_hh_demand(scn, year, engine)
819
820
821
def insert_cts_ind_demands():
822
    """Insert electricity demands per nuts3-region in Germany according to
823
    demandregio using its disaggregator-tool in MWh
824
825
    Returns
826
    -------
827
    None.
828
829
    """
830
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
831
        "targets"
832
    ]
833
    engine = db.engine()
834
835
    for t in targets:
836
        db.execute_sql(
837
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
838
        )
839
840
    insert_cts_ind_wz_definitions()
841
842
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
843
844
    scenarios.append("eGon2021")
845
846
    for scn in scenarios:
847
        year = scenario_parameters.global_settings(scn)["population_year"]
848
849
        if year > 2035:
850
            year = 2035
851
852
        # target values per scenario in MWh
853
        # for eGon2021 and status2019 demandregio-data is used without scaling
854
        if scn not in ["eGon2021", "status2019"]:
855
            target_values = {
856
                "CTS": get_sector_parameters("electricity", scenario=scn)[
857
                    "annual_demand"
858
                ]["CTS"],
859
                "industry": get_sector_parameters("electricity", scenario=scn)[
860
                    "annual_demand"
861
                ]["industry"],
862
            }
863
        else:
864
            target_values = None
865
866
        insert_cts_ind(scn, year, engine, target_values)
867
868
    # Insert load curves per wz
869
    timeseries_per_wz()
870
871
872
def insert_society_data():
873
    """Insert population and number of households per nuts3-region in Germany
874
    according to demandregio using its disaggregator-tool
875
876
    Returns
877
    -------
878
    None.
879
880
    """
881
    targets = egon.data.config.datasets()["demandregio_society"]["targets"]
882
    engine = db.engine()
883
884
    for t in targets:
885
        db.execute_sql(
886
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
887
        )
888
889
    target_years = np.append(
890
        get_sector_parameters("global").population_year.values, 2018
891
    )
892
893
    for year in target_years:
894
        df_pop = pd.DataFrame(data.population(year=year))
895
        df_pop["year"] = year
896
        df_pop = df_pop.rename({"value": "population"}, axis="columns")
897
        # Select data for nuts3-regions in boundaries (needed for testmode)
898
        df_pop = data_in_boundaries(df_pop)
899
        df_pop.to_sql(
900
            targets["population"]["table"],
901
            engine,
902
            schema=targets["population"]["schema"],
903
            if_exists="append",
904
        )
905
906
    for year in target_years:
907
        df_hh = pd.DataFrame(data.households_per_size(year=year))
908
        # Select data for nuts3-regions in boundaries (needed for testmode)
909
        df_hh = data_in_boundaries(df_hh)
910
        for hh_size in df_hh.columns:
911
            df = pd.DataFrame(df_hh[hh_size])
912
            df["year"] = year
913
            df["hh_size"] = hh_size
914
            df = df.rename({hh_size: "households"}, axis="columns")
915
            df.to_sql(
916
                targets["household"]["table"],
917
                engine,
918
                schema=targets["household"]["schema"],
919
                if_exists="append",
920
            )
921
922
923
def insert_timeseries_per_wz(sector, year):
924
    """Insert normalized electrical load time series for the selected sector
925
926
    Parameters
927
    ----------
928
    sector : str
929
        Name of the sector. ['CTS', 'industry']
930
    year : int
931
        Selected weather year
932
933
    Returns
934
    -------
935
    None.
936
937
    """
938
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
939
        "targets"
940
    ]
941
942
    if sector == "CTS":
943
        profiles = (
944
            data.CTS_power_slp_generator("SH", year=year)
945
            .drop(
946
                [
947
                    "Day",
948
                    "Hour",
949
                    "DayOfYear",
950
                    "WD",
951
                    "SA",
952
                    "SU",
953
                    "WIZ",
954
                    "SOZ",
955
                    "UEZ",
956
                ],
957
                axis="columns",
958
            )
959
            .resample("H")
960
            .sum()
961
        )
962
        wz_slp = config.slp_branch_cts_power()
963
    elif sector == "industry":
964
        profiles = (
965
            data.shift_load_profile_generator(state="SH", year=year)
966
            .resample("H")
967
            .sum()
968
        )
969
        wz_slp = config.shift_profile_industry()
970
971
    else:
972
        print(f"Sector {sector} is not valid.")
973
974
    df = pd.DataFrame(
975
        index=wz_slp.keys(), columns=["slp", "load_curve", "year"]
0 ignored issues
show
introduced by
The variable wz_slp does not seem to be defined for all execution paths.
Loading history...
976
    )
977
978
    df.index.rename("wz", inplace=True)
979
980
    df.slp = wz_slp.values()
981
982
    df.year = year
983
984
    df.load_curve = profiles[df.slp].transpose().values.tolist()
0 ignored issues
show
introduced by
The variable profiles does not seem to be defined for all execution paths.
Loading history...
985
986
    db.execute_sql(
987
        f"""
988
                   DELETE FROM {targets['timeseries_cts_ind']['schema']}.
989
                   {targets['timeseries_cts_ind']['table']}
990
                   WHERE wz IN (
991
                       SELECT wz FROM {targets['wz_definitions']['schema']}.
992
                       {targets['wz_definitions']['table']}
993
                       WHERE sector = '{sector}')
994
                   """
995
    )
996
997
    df.to_sql(
998
        targets["timeseries_cts_ind"]["table"],
999
        schema=targets["timeseries_cts_ind"]["schema"],
1000
        con=db.engine(),
1001
        if_exists="append",
1002
    )
1003
1004
1005
def timeseries_per_wz():
1006
    """Calcultae and insert normalized timeseries per wz for cts and industry
1007
1008
    Returns
1009
    -------
1010
    None.
1011
1012
    """
1013
1014
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
1015
    year_already_in_database = []
1016
    for scn in scenarios:
1017
        year = int(scenario_parameters.global_settings(scn)["weather_year"])
1018
1019
        for sector in ["CTS", "industry"]:
1020
            if not year in year_already_in_database:
1021
                insert_timeseries_per_wz(sector, int(year))
1022
        year_already_in_database.append(year)
1023
1024
1025
def get_cached_tables():
1026
    """Get cached demandregio tables and db-dump from former runs"""
1027
    data_config = egon.data.config.datasets()
1028
    for s in ["cache", "dbdump"]:
1029
        source_path = data_config["demandregio_workaround"]["source"][s][
1030
            "path"
1031
        ]
1032
        target_path = Path(
1033
            ".", data_config["demandregio_workaround"]["targets"][s]["path"]
1034
        )
1035
        os.makedirs(target_path, exist_ok=True)
1036
1037
        with zipfile.ZipFile(source_path, "r") as zip_ref:
1038
            zip_ref.extractall(path=target_path)
1039