data.datasets.demandregio.insert_hh_demand()   B
last analyzed

Complexity

Conditions 7

Size

Total Lines 83
Code Lines 47

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 47
dl 0
loc 83
rs 7.3345
c 0
b 0
f 0
cc 7
nop 3

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""The central module containing all code dealing with importing and
2
adjusting data from demandRegio
3
4
"""
5
6
from pathlib import Path
7
import os
8
import zipfile
9
10
from sqlalchemy import ARRAY, Column, Float, ForeignKey, Integer, String
11
from sqlalchemy.ext.declarative import declarative_base
12
import numpy as np
13
import pandas as pd
14
15
from egon.data import db, logger
16
from egon.data.datasets import Dataset, wrapped_partial
17
from egon.data.datasets.demandregio.install_disaggregator import (
18
    clone_and_install,
19
)
20
from egon.data.datasets.scenario_parameters import (
21
    EgonScenario,
22
    get_sector_parameters,
23
)
24
from egon.data.datasets.zensus import download_and_check
25
import egon.data.config
26
import egon.data.datasets.scenario_parameters.parameters as scenario_parameters
27
28
try:
29
    from disaggregator import config, data, spatial, temporal
30
31
except ImportError as e:
32
    pass
33
34
# will be later imported from another file ###
35
Base = declarative_base()
36
37
38
class DemandRegio(Dataset):
39
    """
40
    Extract and adjust data from DemandRegio
41
42
    Demand data for the sectors households, CTS and industry are calculated
43
    using DemandRegio's diaggregator and input data. To bring the resulting
44
    data in line with other data used in eGon-data and the eGon project in
45
    general some data needed to be adjusted or extended, e.g. in function
46
    :py:func:`adjust_ind_pes` or function :py:func:`adjust_cts_ind_nep`. The
47
    resulting data is written into newly created tables.
48
49
    *Dependencies*
50
      * :py:class:`DataBundle <egon.data.datasets.data_bundle.DataBundle>`
51
      * :py:class:`ScenarioParameters <egon.data.datasets.scenario_parameters.ScenarioParameters>`
52
      * :py:class:`ZensusVg250 <egon.data.datasets.zensus_vg250.ZensusVg250>`
53
54
    *Resulting tables*
55
      * :py:class:`demand.egon_demandregio_hh <egon.data.datasets.demandregio.EgonDemandRegioHH>` is created and filled
56
      * :py:class:`demand.egon_demandregio_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioCtsInd>` is created and filled
57
      * :py:class:`society.egon_demandregio_population <egon.data.datasets.demandregio.EgonDemandRegioPopulation>` is created and filled
58
      * :py:class:`society.egon_demandregio_household <egon.data.datasets.demandregio.EgonDemandRegioHouseholds>` is created and filled
59
      * :py:class:`demand.egon_demandregio_wz <egon.data.datasets.demandregio.EgonDemandRegioWz>` is created and filled
60
      * :py:class:`demand.egon_demandregio_timeseries_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioTimeseriesCtsInd>` is created and filled
61
62
    """
63
64
    #:
65
    name: str = "DemandRegio"
66
    #:
67
    version: str = "0.0.11"
68
69
    def __init__(self, dependencies):
70
        super().__init__(
71
            name=self.name,
72
            version=self.version,
73
            dependencies=dependencies,
74
            tasks=(
75
                # clone_and_install,  # demandregio must be previously installed
76
                get_cached_tables,  # adhoc workaround #180
77
                create_tables,
78
                {
79
                    insert_household_demand,
80
                    insert_society_data,
81
                    insert_cts_ind_demands,
82
                },
83
            ),
84
        )
85
86
87
class DemandRegioLoadProfiles(Base):
88
    __tablename__ = "demandregio_household_load_profiles"
89
    __table_args__ = {"schema": "demand"}
90
91
    id = Column(Integer, primary_key=True)
92
    year = Column(Integer)
93
    nuts3 = Column(String)
94
    load_in_mwh = Column(ARRAY(Float()))
95
96
97
class EgonDemandRegioHH(Base):
98
    __tablename__ = "egon_demandregio_hh"
99
    __table_args__ = {"schema": "demand"}
100
    nuts3 = Column(String(5), primary_key=True)
101
    hh_size = Column(Integer, primary_key=True)
102
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
103
    year = Column(Integer)
104
    demand = Column(Float)
105
106
107
class EgonDemandRegioCtsInd(Base):
108
    __tablename__ = "egon_demandregio_cts_ind"
109
    __table_args__ = {"schema": "demand"}
110
    nuts3 = Column(String(5), primary_key=True)
111
    wz = Column(Integer, primary_key=True)
112
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
113
    year = Column(Integer)
114
    demand = Column(Float)
115
116
117
class EgonDemandRegioPopulation(Base):
118
    __tablename__ = "egon_demandregio_population"
119
    __table_args__ = {"schema": "society"}
120
    nuts3 = Column(String(5), primary_key=True)
121
    year = Column(Integer, primary_key=True)
122
    population = Column(Float)
123
124
125
class EgonDemandRegioHouseholds(Base):
126
    __tablename__ = "egon_demandregio_household"
127
    __table_args__ = {"schema": "society"}
128
    nuts3 = Column(String(5), primary_key=True)
129
    hh_size = Column(Integer, primary_key=True)
130
    year = Column(Integer, primary_key=True)
131
    households = Column(Integer)
132
133
134
class EgonDemandRegioWz(Base):
135
    __tablename__ = "egon_demandregio_wz"
136
    __table_args__ = {"schema": "demand"}
137
    wz = Column(Integer, primary_key=True)
138
    sector = Column(String(50))
139
    definition = Column(String(150))
140
141
142
class EgonDemandRegioTimeseriesCtsInd(Base):
143
    __tablename__ = "egon_demandregio_timeseries_cts_ind"
144
    __table_args__ = {"schema": "demand"}
145
    wz = Column(Integer, primary_key=True)
146
    year = Column(Integer, primary_key=True)
147
    slp = Column(String(50))
148
    load_curve = Column(ARRAY(Float()))
149
150
151
def create_tables():
152
    """Create tables for demandregio data
153
    Returns
154
    -------
155
    None.
156
    """
157
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;")
158
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS society;")
159
    engine = db.engine()
160
    EgonDemandRegioHH.__table__.create(bind=engine, checkfirst=True)
161
    EgonDemandRegioCtsInd.__table__.create(bind=engine, checkfirst=True)
162
    EgonDemandRegioPopulation.__table__.create(bind=engine, checkfirst=True)
163
    EgonDemandRegioHouseholds.__table__.create(bind=engine, checkfirst=True)
164
    EgonDemandRegioWz.__table__.create(bind=engine, checkfirst=True)
165
    DemandRegioLoadProfiles.__table__.create(bind=db.engine(), checkfirst=True)
166
    EgonDemandRegioTimeseriesCtsInd.__table__.drop(
167
        bind=engine, checkfirst=True
168
    )
169
    EgonDemandRegioTimeseriesCtsInd.__table__.create(
170
        bind=engine, checkfirst=True
171
    )
172
173
174
def data_in_boundaries(df):
175
    """Select rows with nuts3 code within boundaries, used for testmode
176
177
    Parameters
178
    ----------
179
    df : pandas.DataFrame
180
        Data for all nuts3 regions
181
182
    Returns
183
    -------
184
    pandas.DataFrame
185
        Data for nuts3 regions within boundaries
186
187
    """
188
    engine = db.engine()
189
190
    df = df.reset_index()
191
192
    # Change nuts3 region names to 2016 version
193
    nuts_names = {"DEB16": "DEB1C", "DEB19": "DEB1D"}
194
    df.loc[df.nuts3.isin(nuts_names), "nuts3"] = df.loc[
195
        df.nuts3.isin(nuts_names), "nuts3"
196
    ].map(nuts_names)
197
198
    df = df.set_index("nuts3")
199
200
    return df[
201
        df.index.isin(
202
            pd.read_sql(
203
                "SELECT DISTINCT ON (nuts) nuts FROM boundaries.vg250_krs",
204
                engine,
205
            ).nuts
206
        )
207
    ]
208
209
210
def insert_cts_ind_wz_definitions():
211
    """Insert demandregio's definitions of CTS and industrial branches
212
213
    Returns
214
    -------
215
    None.
216
217
    """
218
219
    source = egon.data.config.datasets()["demandregio_cts_ind_demand"][
220
        "sources"
221
    ]
222
223
    target = egon.data.config.datasets()["demandregio_cts_ind_demand"][
224
        "targets"
225
    ]["wz_definitions"]
226
227
    engine = db.engine()
228
229
    for sector in source["wz_definitions"]:
230
        file_path = (
231
            Path(".")
232
            / "data_bundle_egon_data"
233
            / "WZ_definition"
234
            / source["wz_definitions"][sector]
235
        )
236
237
        if sector == "CTS":
238
            delimiter = ";"
239
        else:
240
            delimiter = ","
241
        df = (
242
            pd.read_csv(file_path, delimiter=delimiter, header=None)
243
            .rename({0: "wz", 1: "definition"}, axis="columns")
244
            .set_index("wz")
245
        )
246
        df["sector"] = sector
247
        df.to_sql(
248
            target["table"],
249
            engine,
250
            schema=target["schema"],
251
            if_exists="append",
252
        )
253
254
255
def match_nuts3_bl():
256
    """Function that maps the federal state to each nuts3 region
257
258
    Returns
259
    -------
260
    df : pandas.DataFrame
261
        List of nuts3 regions and the federal state of Germany.
262
263
    """
264
265
    engine = db.engine()
266
267
    df = pd.read_sql(
268
        "SELECT DISTINCT ON (boundaries.vg250_krs.nuts) "
269
        "boundaries.vg250_krs.nuts, boundaries.vg250_lan.gen "
270
        "FROM boundaries.vg250_lan, boundaries.vg250_krs "
271
        " WHERE ST_CONTAINS("
272
        "boundaries.vg250_lan.geometry, "
273
        "boundaries.vg250_krs.geometry)",
274
        con=engine,
275
    )
276
277
    df.gen[df.gen == "Baden-Württemberg (Bodensee)"] = "Baden-Württemberg"
278
    df.gen[df.gen == "Bayern (Bodensee)"] = "Bayern"
279
280
    return df.set_index("nuts")
281
282
283
def adjust_ind_pes(ec_cts_ind):
284
    """
285
    Adjust electricity demand of industrial consumers due to electrification
286
    of process heat based on assumptions of pypsa-eur-sec.
287
288
    Parameters
289
    ----------
290
    ec_cts_ind : pandas.DataFrame
291
        Industrial demand without additional electrification
292
293
    Returns
294
    -------
295
    ec_cts_ind : pandas.DataFrame
296
        Industrial demand with additional electrification
297
298
    """
299
300
    pes_path = (
301
        Path(".") / "data_bundle_powerd_data" / "pypsa_eur" / "resources"
302
    )
303
304
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
305
        "sources"
306
    ]["new_consumers_2050"]
307
308
    # Extract today's industrial demand from pypsa-eur-sec
309
    demand_today = pd.read_csv(
310
        pes_path / sources["pes-demand-today"],
311
        header=None,
312
    ).transpose()
313
314
    # Filter data
315
    demand_today[1].fillna("carrier", inplace=True)
316
    demand_today = demand_today[
317
        (demand_today[0] == "DE") | (demand_today[1] == "carrier")
318
    ].drop([0, 2], axis="columns")
319
320
    demand_today = (
321
        demand_today.transpose()
322
        .set_index(0)
323
        .transpose()
324
        .set_index("carrier")
325
        .transpose()
326
        .loc["electricity"]
327
        .astype(float)
328
    )
329
330
    # Calculate future industrial demand from pypsa-eur-sec
331
    # based on production and energy demands per carrier ('sector ratios')
332
    prod_tomorrow = pd.read_csv(pes_path / sources["pes-production-tomorrow"])
333
334
    prod_tomorrow = prod_tomorrow[prod_tomorrow["kton/a"] == "DE"].set_index(
335
        "kton/a"
336
    )
337
338
    sector_ratio = (
339
        pd.read_csv(pes_path / sources["pes-sector-ratios"])
340
        .set_index("MWh/tMaterial")
341
        .loc["elec"]
342
    )
343
344
    demand_tomorrow = prod_tomorrow.multiply(
345
        sector_ratio.div(1000)
346
    ).transpose()["DE"]
347
348
    # Calculate changes of electrical demand per sector in pypsa-eur-sec
349
    change = pd.DataFrame(
350
        (demand_tomorrow / demand_today)
351
        / (demand_tomorrow / demand_today).sum()
352
    )
353
354
    # Drop rows without changes
355
    change = change[~change[0].isnull()]
356
357
    # Map industrial branches of pypsa-eur-sec to WZ2008 used in demandregio
358
    change["wz"] = change.index.map(
359
        {
360
            "Alumina production": 24,
361
            "Aluminium - primary production": 24,
362
            "Aluminium - secondary production": 24,
363
            "Ammonia": 20,
364
            "Basic chemicals (without ammonia)": 20,
365
            "Cement": 23,
366
            "Ceramics & other NMM": 23,
367
            "Electric arc": 24,
368
            "Food, beverages and tobacco": 10,
369
            "Glass production": 23,
370
            "Integrated steelworks": 24,
371
            "Machinery Equipment": 28,
372
            "Other Industrial Sectors": 32,
373
            "Other chemicals": 20,
374
            "Other non-ferrous metals": 24,
375
            "Paper production": 17,
376
            "Pharmaceutical products etc.": 21,
377
            "Printing and media reproduction": 18,
378
            "Pulp production": 17,
379
            "Textiles and leather": 13,
380
            "Transport Equipment": 29,
381
            "Wood and wood products": 16,
382
        }
383
    )
384
385
    # Group by WZ2008
386
    shares_per_wz = change.groupby("wz")[0].sum()
387
388
    # Calculate addtional demands needed to meet future demand of pypsa-eur-sec
389
    addtional_mwh = shares_per_wz.multiply(
390
        demand_tomorrow.sum() * 1000000 - ec_cts_ind.sum().sum()
391
    )
392
393
    # Calulate overall industrial demand for eGon100RE
394
    final_mwh = addtional_mwh + ec_cts_ind[addtional_mwh.index].sum()
395
396
    # Linear scale the industrial demands per nuts3 and wz to meet final demand
397
    ec_cts_ind[addtional_mwh.index] *= (
398
        final_mwh / ec_cts_ind[addtional_mwh.index].sum()
399
    )
400
401
    return ec_cts_ind
402
403
404
def adjust_cts_ind_nep(ec_cts_ind, sector):
405
    """Add electrical demand of new largescale CTS und industrial consumers
406
    according to NEP 2021, scneario C 2035. Values per federal state are
407
    linear distributed over all CTS branches and nuts3 regions.
408
409
    Parameters
410
    ----------
411
    ec_cts_ind : pandas.DataFrame
412
        CTS or industry demand without new largescale consumers.
413
414
    Returns
415
    -------
416
    ec_cts_ind : pandas.DataFrame
417
        CTS or industry demand including new largescale consumers.
418
419
    """
420
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
421
        "sources"
422
    ]
423
424
    file_path = (
425
        Path(".")
426
        / "data_bundle_egon_data"
427
        / "nep2035_version2021"
428
        / sources["new_consumers_2035"]
429
    )
430
431
    # get data from NEP per federal state
432
    new_con = pd.read_csv(file_path, delimiter=";", decimal=",", index_col=0)
433
434
    # match nuts3 regions to federal states
435
    groups = ec_cts_ind.groupby(match_nuts3_bl().gen)
436
437
    # update demands per federal state
438
    for group in groups.indices.keys():
439
        g = groups.get_group(group)
440
        data_new = g.mul(1 + new_con[sector][group] * 1e6 / g.sum().sum())
441
        ec_cts_ind[ec_cts_ind.index.isin(g.index)] = data_new
442
443
    return ec_cts_ind
444
445
446
def disagg_households_power(
447
    scenario, year, weight_by_income=False, original=False, **kwargs
448
):
449
    """
450
    Perform spatial disaggregation of electric power in [GWh/a] by key and
451
    possibly weight by income.
452
    Similar to disaggregator.spatial.disagg_households_power
453
454
455
    Parameters
456
    ----------
457
    by : str
458
        must be one of ['households', 'population']
459
    weight_by_income : bool, optional
460
        Flag if to weight the results by the regional income (default False)
461
    orignal : bool, optional
462
        Throughput to function households_per_size,
463
        A flag if the results should be left untouched and returned in
464
        original form for the year 2011 (True) or if they should be scaled to
465
        the given `year` by the population in that year (False).
466
467
    Returns
468
    -------
469
    pd.DataFrame or pd.Series
470
    """
471
    # source: survey of energieAgenturNRW
472
    # with/without direct water heating (DHW), and weighted average
473
    # https://1-stromvergleich.com/wp-content/uploads/erhebung_wo_bleibt_der_strom.pdf
474
    demand_per_hh_size = pd.DataFrame(
475
        index=range(1, 7),
476
        data={
477
            # "weighted DWH": [2290, 3202, 4193, 4955, 5928, 5928],
478
            # "without DHW": [1714, 2812, 3704, 4432, 5317, 5317],
479
            "with_DHW": [2181, 3843, 5151, 6189, 7494, 8465],
480
            "without_DHW": [1798, 2850, 3733, 4480, 5311, 5816],
481
            "weighted": [2256, 3248, 4246, 5009, 5969, 6579],
482
        },
483
    )
484
485
    if scenario == "eGon100RE":
486
        # chose demand per household size from survey without DHW
487
        power_per_HH = (
488
            demand_per_hh_size["without_DHW"] / 1e3
489
        )  # TODO why without?
490
491
        # calculate demand per nuts3 in 2011
492
        df_2011 = data.households_per_size(year=2011) * power_per_HH
493
494
        # scale demand per hh-size to meet demand without heat
495
        # according to JRC in 2011 (136.6-(20.14+9.41) TWh)
496
        # TODO check source and method
497
        power_per_HH *= (136.6 - (20.14 + 9.41)) * 1e6 / df_2011.sum().sum()
498
499
        # calculate demand per nuts3 in 2050
500
        df = data.households_per_size(year=year) * power_per_HH
501
502
    # Bottom-Up: Power demand by household sizes in [MWh/a] for each scenario
503
    elif scenario in ["status2019", "status2023", "eGon2021", "eGon2035"]:
504
        # chose demand per household size from survey including weighted DHW
505
        power_per_HH = demand_per_hh_size["weighted"] / 1e3
506
507
        # calculate demand per nuts3
508
        df = (
509
            data.households_per_size(original=original, year=year)
510
            * power_per_HH
511
        )
512
513
        if scenario == "eGon2035":
514
            # scale to fit demand of NEP 2021 scebario C 2035 (119TWh)
515
            df *= 119 * 1e6 / df.sum().sum()
516
517
        if scenario == "status2023":
518
            # scale to fit demand of BDEW 2023 (130.48 TWh) see issue #180
519
            df *= 130.48 * 1e6 / df.sum().sum()
520
521
        # if scenario == "status2021": # TODO status2021
522
        #     # scale to fit demand of AGEB 2021 (138.6 TWh)
523
        #     # https://ag-energiebilanzen.de/wp-content/uploads/2023/01/AGEB_22p2_rev-1.pdf#page=10
524
        #     df *= 138.6 * 1e6 / df.sum().sum()
525
526
    elif scenario == "eGon100RE":
527
        # chose demand per household size from survey without DHW
528
        power_per_HH = demand_per_hh_size["without DHW"] / 1e3
529
530
        # calculate demand per nuts3 in 2011
531
        df_2011 = data.households_per_size(year=2011) * power_per_HH
532
533
        # scale demand per hh-size to meet demand without heat
534
        # according to JRC in 2011 (136.6-(20.14+9.41) TWh)
535
        power_per_HH *= (136.6 - (20.14 + 9.41)) * 1e6 / df_2011.sum().sum()
536
537
        # calculate demand per nuts3 in 2050
538
        df = data.households_per_size(year=year) * power_per_HH
539
540
        # scale to meet annual demand from NEP 2023, scenario B 2045
541
        df *= 90400000 / df.sum().sum()
542
543
    else:
544
        print(
545
            f"Electric demand per household size for scenario {scenario} "
546
            "is not specified."
547
        )
548
549
    if weight_by_income:
550
        df = spatial.adjust_by_income(df=df)
0 ignored issues
show
introduced by
The variable df does not seem to be defined for all execution paths.
Loading history...
551
552
    return df
553
554
555
def write_demandregio_hh_profiles_to_db(hh_profiles):
556
    """Write HH demand profiles from demand regio into db. One row per
557
    year and nuts3. The annual load profile timeseries is an array.
558
559
    schema: demand
560
    tablename: demandregio_household_load_profiles
561
562
563
564
    Parameters
565
    ----------
566
    hh_profiles: pd.DataFrame
567
568
    Returns
569
    -------
570
    """
571
    years = hh_profiles.index.year.unique().values
572
    df_to_db = pd.DataFrame(
573
        columns=["id", "year", "nuts3", "load_in_mwh"]
574
    ).set_index("id")
575
    dataset = egon.data.config.settings()["egon-data"]["--dataset-boundary"]
576
577
    if dataset == "Schleswig-Holstein":
578
        hh_profiles = hh_profiles.loc[
579
            :, hh_profiles.columns.str.contains("DEF0")
580
        ]
581
582
    idx = pd.read_sql_query(
583
        f"""
584
                           SELECT MAX(id)
585
                           FROM {DemandRegioLoadProfiles.__table__.schema}.
586
                           {DemandRegioLoadProfiles.__table__.name}
587
                           """,
588
        con=db.engine(),
589
    ).iat[0, 0]
590
591
    idx = 0 if idx is None else idx + 1
592
593
    for year in years:
594
        df = hh_profiles[hh_profiles.index.year == year]
595
596
        for nuts3 in hh_profiles.columns:
597
            idx+=1
598
            df_to_db.at[idx, "year"] = year
599
            df_to_db.at[idx, "nuts3"] = nuts3
600
            df_to_db.at[idx, "load_in_mwh"] = df[nuts3].to_list()
601
602
    df_to_db["year"] = df_to_db["year"].apply(int)
603
    df_to_db["nuts3"] = df_to_db["nuts3"].astype(str)
604
    df_to_db["load_in_mwh"] = df_to_db["load_in_mwh"].apply(list)
605
    df_to_db = df_to_db.reset_index()
606
607
    df_to_db.to_sql(
608
        name=DemandRegioLoadProfiles.__table__.name,
609
        schema=DemandRegioLoadProfiles.__table__.schema,
610
        con=db.engine(),
611
        if_exists="append",
612
        index=-False,
613
    )
614
615
616
def insert_hh_demand(scenario, year, engine):
617
    """Calculates electrical demands of private households using demandregio's
618
    disaggregator and insert results into the database.
619
620
    Parameters
621
    ----------
622
    scenario : str
623
        Name of the corresponding scenario.
624
    year : int
625
        The number of households per region is taken from this year.
626
627
    Returns
628
    -------
629
    None.
630
631
    """
632
    targets = egon.data.config.datasets()["demandregio_household_demand"][
633
        "targets"
634
    ]["household_demand"]
635
    # get demands of private households per nuts and size from demandregio
636
    ec_hh = disagg_households_power(scenario, year)
637
638
    # Select demands for nuts3-regions in boundaries (needed for testmode)
639
    ec_hh = data_in_boundaries(ec_hh)
640
641
    # insert into database
642
    for hh_size in ec_hh.columns:
643
        df = pd.DataFrame(ec_hh[hh_size])
644
        df["year"] = 2023 if scenario == "status2023" else year # TODO status2023
645
        # adhoc fix until ffeopendata servers are up and population_year can be set
646
647
        df["scenario"] = scenario
648
        df["hh_size"] = hh_size
649
        df = df.rename({hh_size: "demand"}, axis="columns")
650
        df.to_sql(
651
            targets["table"],
652
            engine,
653
            schema=targets["schema"],
654
            if_exists="append",
655
        )
656
657
    # insert housholds demand timeseries
658
    try:
659
        hh_load_timeseries = (
660
            temporal.disagg_temporal_power_housholds_slp(
661
                use_nuts3code=True,
662
                by="households",
663
                weight_by_income=False,
664
                year=year,
665
            )
666
            .resample("h")
667
            .sum()
668
        )
669
        hh_load_timeseries.rename(
670
            columns={"DEB16": "DEB1C", "DEB19": "DEB1D"}, inplace=True)
671
    except Exception as e:
672
        logger.warning(
673
            f"Couldnt get profiles from FFE, will use pickeld fallback! \n {e}"
674
        )
675
        hh_load_timeseries = pd.read_csv(
676
            "data_bundle_egon_data/demand_regio_backup/df_load_profiles.csv",
677
            index_col="time"
678
        )
679
        hh_load_timeseries.index = pd.to_datetime(
680
            hh_load_timeseries.index, format="%Y-%m-%d %H:%M:%S"
681
        )
682
683
        def change_year(dt, year):
684
            return dt.replace(year=year)
685
686
        year = 2023 if scenario == "status2023" else year  # TODO status2023
687
        hh_load_timeseries.index = hh_load_timeseries.index.map(
688
            lambda dt: change_year(dt, year)
0 ignored issues
show
introduced by
The variable change_year does not seem to be defined for all execution paths.
Loading history...
689
        )
690
691
        if scenario == "status2023":
692
            hh_load_timeseries = hh_load_timeseries.shift(24 * 2)
693
694
            hh_load_timeseries.iloc[: 24 * 7] = hh_load_timeseries.iloc[
695
                24 * 7 : 24 * 7 * 2
696
            ].values
697
698
    write_demandregio_hh_profiles_to_db(hh_load_timeseries)
699
700
701
def insert_cts_ind(scenario, year, engine, target_values):
702
    """Calculates electrical demands of CTS and industry using demandregio's
703
    disaggregator, adjusts them according to resulting values of NEP 2021 or
704
    JRC IDEES and insert results into the database.
705
706
    Parameters
707
    ----------
708
    scenario : str
709
        Name of the corresponing scenario.
710
    year : int
711
        The number of households per region is taken from this year.
712
    target_values : dict
713
        List of target values for each scenario and sector.
714
715
    Returns
716
    -------
717
    None.
718
719
    """
720
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
721
        "targets"
722
    ]
723
724
    wz_table = pd.read_sql("SELECT wz, sector FROM demand.egon_demandregio_wz",
725
                           con = engine,
726
                           index_col = "wz")
727
728
    # Workaround: Since the disaggregator does not work anymore, data from
729
    # previous runs is used for eGon2035 and eGon100RE
730
    if scenario == "eGon2035":
731
        file2035_path = (
732
            Path(".")
733
            / "data_bundle_egon_data"
734
            / "demand_regio_backup"
735
            / "egon_demandregio_cts_ind_egon2035.csv"
736
        )
737
        ec_cts_ind2 = pd.read_csv(file2035_path)
738
        ec_cts_ind2.to_sql(
739
            targets["cts_ind_demand"]["table"],
740
            engine,
741
            targets["cts_ind_demand"]["schema"],
742
            if_exists="append",
743
            index=False,
744
        )
745
        return
746
747
    if scenario == "eGon100RE":
748
        ec_cts_ind2 = pd.read_csv(
749
            "data_bundle_egon_data/demand_regio_backup/egon_demandregio_cts_ind.csv"
750
        )
751
        ec_cts_ind2["sector"] = ec_cts_ind2["wz"].map(wz_table["sector"])
752
        factor_ind = target_values[scenario]["industry"] / (
753
            ec_cts_ind2[ec_cts_ind2["sector"] == "industry"]["demand"].sum()
754
            / 1000
755
        )
756
        factor_cts = target_values[scenario]["CTS"] / (
757
            ec_cts_ind2[ec_cts_ind2["sector"] == "CTS"]["demand"].sum() / 1000
758
        )
759
760
        ec_cts_ind2["demand"] = ec_cts_ind2.apply(
761
            lambda x: (
762
                x["demand"] * factor_ind
0 ignored issues
show
introduced by
The variable factor_ind does not seem to be defined for all execution paths.
Loading history...
763
                if x["sector"] == "industry"
764
                else x["demand"] * factor_cts
0 ignored issues
show
introduced by
The variable factor_cts does not seem to be defined for all execution paths.
Loading history...
765
            ),
766
            axis=1,
767
        )
768
769
        ec_cts_ind2.drop(columns=["sector"], inplace = True)
770
771
        ec_cts_ind2.to_sql(
772
            targets["cts_ind_demand"]["table"],
773
            engine,
774
            targets["cts_ind_demand"]["schema"],
775
            if_exists="append",
776
            index=False,
777
        )
778
        return
779
780
    for sector in ["CTS", "industry"]:
781
        # get demands per nuts3 and wz of demandregio
782
        ec_cts_ind = spatial.disagg_CTS_industry(
783
            use_nuts3code=True, source="power", sector=sector, year=year
784
        ).transpose()
785
786
        ec_cts_ind.index = ec_cts_ind.index.rename("nuts3")
787
788
        # exclude mobility sector from GHD
789
        ec_cts_ind = ec_cts_ind.drop(columns=49, errors="ignore")
790
791
        # scale values according to target_values
792
        if sector in target_values[scenario].keys():
793
            ec_cts_ind *= (
794
                target_values[scenario][sector] * 1e3 / ec_cts_ind.sum().sum()
795
            )
796
797
        # include new largescale consumers according to NEP 2021
798
        if scenario == "eGon2035":
799
            ec_cts_ind = adjust_cts_ind_nep(ec_cts_ind, sector)
800
        # include new industrial demands due to sector coupling
801
        if (scenario == "eGon100RE") & (sector == "industry"):
802
            ec_cts_ind = adjust_ind_pes(ec_cts_ind)
803
804
        # Select demands for nuts3-regions in boundaries (needed for testmode)
805
        ec_cts_ind = data_in_boundaries(ec_cts_ind)
806
807
        # insert into database
808
        for wz in ec_cts_ind.columns:
809
            df = pd.DataFrame(ec_cts_ind[wz])
810
            df["year"] = year
811
            df["wz"] = wz
812
            df["scenario"] = scenario
813
            df = df.rename({wz: "demand"}, axis="columns")
814
            df.index = df.index.rename("nuts3")
815
            df.to_sql(
816
                targets["cts_ind_demand"]["table"],
817
                engine,
818
                targets["cts_ind_demand"]["schema"],
819
                if_exists="append",
820
            )
821
822
823
def insert_household_demand():
824
    """Insert electrical demands for households according to
825
    demandregio using its disaggregator-tool in MWh
826
827
    Returns
828
    -------
829
    None.
830
831
    """
832
    targets = egon.data.config.datasets()["demandregio_household_demand"][
833
        "targets"
834
    ]
835
    engine = db.engine()
836
837
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
838
839
    scenarios.append("eGon2021")
840
841
    for t in targets:
842
        db.execute_sql(
843
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
844
        )
845
846
    for scn in scenarios:
847
        year = (
848
            2023 if scn == "status2023"
849
            else scenario_parameters.global_settings(scn)["population_year"]
850
        )
851
852
        # Insert demands of private households
853
        insert_hh_demand(scn, year, engine)
854
855
856
def insert_cts_ind_demands():
857
    """Insert electricity demands per nuts3-region in Germany according to
858
    demandregio using its disaggregator-tool in MWh
859
860
    Returns
861
    -------
862
    None.
863
864
    """
865
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
866
        "targets"
867
    ]
868
    engine = db.engine()
869
870
    for t in targets:
871
        db.execute_sql(
872
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
873
        )
874
875
    insert_cts_ind_wz_definitions()
876
877
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
878
879
    scenarios.append("eGon2021")
880
881
    for scn in scenarios:
882
        year = scenario_parameters.global_settings(scn)["population_year"]
883
884
        if year > 2035:
885
            year = 2035
886
887
        # target values per scenario in MWh
888
        target_values = {
889
            # according to NEP 2021
890
            # new consumers will be added seperatly
891
            "eGon2035": {"CTS": 135300, "industry": 225400},
892
            # CTS: reduce overall demand from demandregio (without traffic)
893
            # by share of heat according to JRC IDEES, data from 2011
894
            # industry: no specific heat demand, use data from demandregio
895
            "eGon100RE": {"CTS": 146700, "industry": 382900},
896
            # no adjustments for status quo
897
            "eGon2021": {},
898
            "status2019": {},
899
            "status2023": {
900
                "CTS": 121160 * 1e3,
901
                "industry": 200380 * 1e3
902
            },
903
        }
904
905
        insert_cts_ind(scn, year, engine, target_values)
906
907
    # Insert load curves per wz
908
    timeseries_per_wz()
909
910
911
def insert_society_data():
912
    """Insert population and number of households per nuts3-region in Germany
913
    according to demandregio using its disaggregator-tool
914
915
    Returns
916
    -------
917
    None.
918
919
    """
920
    targets = egon.data.config.datasets()["demandregio_society"]["targets"]
921
    engine = db.engine()
922
923
    for t in targets:
924
        db.execute_sql(
925
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
926
        )
927
928
    target_years = np.append(
929
        get_sector_parameters("global").population_year.values, 2018
930
    )
931
932
    for year in target_years:
933
        df_pop = pd.DataFrame(data.population(year=year))
934
        df_pop["year"] = year
935
        df_pop = df_pop.rename({"value": "population"}, axis="columns")
936
        # Select data for nuts3-regions in boundaries (needed for testmode)
937
        df_pop = data_in_boundaries(df_pop)
938
        df_pop.to_sql(
939
            targets["population"]["table"],
940
            engine,
941
            schema=targets["population"]["schema"],
942
            if_exists="append",
943
        )
944
945
    for year in target_years:
946
        df_hh = pd.DataFrame(data.households_per_size(year=year))
947
        # Select data for nuts3-regions in boundaries (needed for testmode)
948
        df_hh = data_in_boundaries(df_hh)
949
        for hh_size in df_hh.columns:
950
            df = pd.DataFrame(df_hh[hh_size])
951
            df["year"] = year
952
            df["hh_size"] = hh_size
953
            df = df.rename({hh_size: "households"}, axis="columns")
954
            df.to_sql(
955
                targets["household"]["table"],
956
                engine,
957
                schema=targets["household"]["schema"],
958
                if_exists="append",
959
            )
960
961
962
def insert_timeseries_per_wz(sector, year):
963
    """Insert normalized electrical load time series for the selected sector
964
965
    Parameters
966
    ----------
967
    sector : str
968
        Name of the sector. ['CTS', 'industry']
969
    year : int
970
        Selected weather year
971
972
    Returns
973
    -------
974
    None.
975
976
    """
977
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
978
        "targets"
979
    ]
980
981
    if sector == "CTS":
982
        profiles = (
983
            data.CTS_power_slp_generator("SH", year=year)
984
            .drop(
985
                [
986
                    "Day",
987
                    "Hour",
988
                    "DayOfYear",
989
                    "WD",
990
                    "SA",
991
                    "SU",
992
                    "WIZ",
993
                    "SOZ",
994
                    "UEZ",
995
                ],
996
                axis="columns",
997
            )
998
            .resample("H")
999
            .sum()
1000
        )
1001
        wz_slp = config.slp_branch_cts_power()
1002
    elif sector == "industry":
1003
        profiles = (
1004
            data.shift_load_profile_generator(state="SH", year=year)
1005
            .resample("H")
1006
            .sum()
1007
        )
1008
        wz_slp = config.shift_profile_industry()
1009
1010
    else:
1011
        print(f"Sector {sector} is not valid.")
1012
1013
    df = pd.DataFrame(
1014
        index=wz_slp.keys(), columns=["slp", "load_curve", "year"]
0 ignored issues
show
introduced by
The variable wz_slp does not seem to be defined for all execution paths.
Loading history...
1015
    )
1016
1017
    df.index.rename("wz", inplace=True)
1018
1019
    df.slp = wz_slp.values()
1020
1021
    df.year = year
1022
1023
    df.load_curve = profiles[df.slp].transpose().values.tolist()
0 ignored issues
show
introduced by
The variable profiles does not seem to be defined for all execution paths.
Loading history...
1024
1025
    db.execute_sql(
1026
        f"""
1027
                   DELETE FROM {targets['timeseries_cts_ind']['schema']}.
1028
                   {targets['timeseries_cts_ind']['table']}
1029
                   WHERE wz IN (
1030
                       SELECT wz FROM {targets['wz_definitions']['schema']}.
1031
                       {targets['wz_definitions']['table']}
1032
                       WHERE sector = '{sector}')
1033
                   """
1034
    )
1035
1036
    df.to_sql(
1037
        targets["timeseries_cts_ind"]["table"],
1038
        schema=targets["timeseries_cts_ind"]["schema"],
1039
        con=db.engine(),
1040
        if_exists="append",
1041
    )
1042
1043
1044
def timeseries_per_wz():
1045
    """Calcultae and insert normalized timeseries per wz for cts and industry
1046
1047
    Returns
1048
    -------
1049
    None.
1050
1051
    """
1052
1053
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
1054
    year_already_in_database = []
1055
    for scn in scenarios:
1056
        year = int(scenario_parameters.global_settings(scn)["weather_year"])
1057
1058
        for sector in ["CTS", "industry"]:
1059
            if not year in year_already_in_database:
1060
                insert_timeseries_per_wz(sector, int(year))
1061
        year_already_in_database.append(year)
1062
1063
1064
def get_cached_tables():
1065
    """Get cached demandregio tables and db-dump from former runs"""
1066
    data_config = egon.data.config.datasets()
1067
    for s in ["cache", "dbdump"]:
1068
        source_path = data_config["demandregio_workaround"]["source"][s][
1069
            "path"
1070
        ]
1071
        target_path = Path(
1072
            ".", data_config["demandregio_workaround"]["targets"][s]["path"]
1073
        )
1074
        os.makedirs(target_path, exist_ok=True)
1075
1076
        with zipfile.ZipFile(source_path, "r") as zip_ref:
1077
            zip_ref.extractall(path=target_path)
1078
1079