data.datasets.demandregio   F
last analyzed

Complexity

Total Complexity 62

Size/Duplication

Total Lines 1077
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 62
eloc 539
dl 0
loc 1077
rs 3.44
c 0
b 0
f 0

16 Functions

Rating   Name   Duplication   Size   Complexity  
A timeseries_per_wz() 0 18 4
B insert_society_data() 0 48 5
B insert_timeseries_per_wz() 0 79 3
A get_cached_tables() 0 14 3
A insert_cts_ind_wz_definitions() 0 42 3
C insert_cts_ind() 0 121 10
A insert_cts_ind_demands() 0 50 4
A create_tables() 0 20 1
A data_in_boundaries() 0 32 1
B adjust_ind_pes() 0 119 1
B disagg_households_power() 0 107 7
A insert_household_demand() 0 32 4
A adjust_cts_ind_nep() 0 40 2
A match_nuts3_bl() 0 26 1
B write_demandregio_hh_profiles_to_db() 0 58 5
B insert_hh_demand() 0 86 7

1 Method

Rating   Name   Duplication   Size   Complexity  
A DemandRegio.__init__() 0 12 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.demandregio often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""The central module containing all code dealing with importing and
2
adjusting data from demandRegio
3
4
"""
5
6
from pathlib import Path
7
import os
8
import zipfile
9
10
from sqlalchemy import ARRAY, Column, Float, ForeignKey, Integer, String
11
from sqlalchemy.ext.declarative import declarative_base
12
import numpy as np
13
import pandas as pd
14
15
from egon.data import db, logger
16
from egon.data.datasets import Dataset, wrapped_partial
17
from egon.data.datasets.scenario_parameters import (
18
    EgonScenario,
19
    get_sector_parameters,
20
)
21
from egon.data.datasets.zensus import download_and_check
22
import egon.data.config
23
import egon.data.datasets.scenario_parameters.parameters as scenario_parameters
24
25
try:
26
    from disaggregator import config, data, spatial, temporal
27
28
except ImportError as e:
29
    pass
30
31
# will be later imported from another file ###
32
Base = declarative_base()
33
34
35
class DemandRegio(Dataset):
36
    """
37
    Extract and adjust data from DemandRegio
38
39
    Demand data for the sectors households, CTS and industry are calculated
40
    using DemandRegio's diaggregator and input data. To bring the resulting
41
    data in line with other data used in eGon-data and the eGon project in
42
    general some data needed to be adjusted or extended, e.g. in function
43
    :py:func:`adjust_ind_pes` or function :py:func:`adjust_cts_ind_nep`. The
44
    resulting data is written into newly created tables.
45
46
    *Dependencies*
47
      * :py:class:`DataBundle <egon.data.datasets.data_bundle.DataBundle>`
48
      * :py:class:`ScenarioParameters <egon.data.datasets.scenario_parameters.ScenarioParameters>`
49
      * :py:class:`ZensusVg250 <egon.data.datasets.zensus_vg250.ZensusVg250>`
50
51
    *Resulting tables*
52
      * :py:class:`demand.egon_demandregio_hh <egon.data.datasets.demandregio.EgonDemandRegioHH>` is created and filled
53
      * :py:class:`demand.egon_demandregio_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioCtsInd>` is created and filled
54
      * :py:class:`society.egon_demandregio_population <egon.data.datasets.demandregio.EgonDemandRegioPopulation>` is created and filled
55
      * :py:class:`society.egon_demandregio_household <egon.data.datasets.demandregio.EgonDemandRegioHouseholds>` is created and filled
56
      * :py:class:`demand.egon_demandregio_wz <egon.data.datasets.demandregio.EgonDemandRegioWz>` is created and filled
57
      * :py:class:`demand.egon_demandregio_timeseries_cts_ind <egon.data.datasets.demandregio.EgonDemandRegioTimeseriesCtsInd>` is created and filled
58
59
    """
60
61
    #:
62
    name: str = "DemandRegio"
63
    #:
64
    version: str = "0.0.12"
65
66
    def __init__(self, dependencies):
67
        super().__init__(
68
            name=self.name,
69
            version=self.version,
70
            dependencies=dependencies,
71
            tasks=(
72
                get_cached_tables,  # adhoc workaround #180
73
                create_tables,
74
                {
75
                    insert_household_demand,
76
                    insert_society_data,
77
                    insert_cts_ind_demands,
78
                },
79
            ),
80
        )
81
82
83
class DemandRegioLoadProfiles(Base):
84
    __tablename__ = "demandregio_household_load_profiles"
85
    __table_args__ = {"schema": "demand"}
86
87
    id = Column(Integer, primary_key=True)
88
    year = Column(Integer)
89
    nuts3 = Column(String)
90
    load_in_mwh = Column(ARRAY(Float()))
91
92
93
class EgonDemandRegioHH(Base):
94
    __tablename__ = "egon_demandregio_hh"
95
    __table_args__ = {"schema": "demand"}
96
    nuts3 = Column(String(5), primary_key=True)
97
    hh_size = Column(Integer, primary_key=True)
98
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
99
    year = Column(Integer)
100
    demand = Column(Float)
101
102
103
class EgonDemandRegioCtsInd(Base):
104
    __tablename__ = "egon_demandregio_cts_ind"
105
    __table_args__ = {"schema": "demand"}
106
    nuts3 = Column(String(5), primary_key=True)
107
    wz = Column(Integer, primary_key=True)
108
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
109
    year = Column(Integer)
110
    demand = Column(Float)
111
112
113
class EgonDemandRegioPopulation(Base):
114
    __tablename__ = "egon_demandregio_population"
115
    __table_args__ = {"schema": "society"}
116
    nuts3 = Column(String(5), primary_key=True)
117
    year = Column(Integer, primary_key=True)
118
    population = Column(Float)
119
120
121
class EgonDemandRegioHouseholds(Base):
122
    __tablename__ = "egon_demandregio_household"
123
    __table_args__ = {"schema": "society"}
124
    nuts3 = Column(String(5), primary_key=True)
125
    hh_size = Column(Integer, primary_key=True)
126
    year = Column(Integer, primary_key=True)
127
    households = Column(Integer)
128
129
130
class EgonDemandRegioWz(Base):
131
    __tablename__ = "egon_demandregio_wz"
132
    __table_args__ = {"schema": "demand"}
133
    wz = Column(Integer, primary_key=True)
134
    sector = Column(String(50))
135
    definition = Column(String(150))
136
137
138
class EgonDemandRegioTimeseriesCtsInd(Base):
139
    __tablename__ = "egon_demandregio_timeseries_cts_ind"
140
    __table_args__ = {"schema": "demand"}
141
    wz = Column(Integer, primary_key=True)
142
    year = Column(Integer, primary_key=True)
143
    slp = Column(String(50))
144
    load_curve = Column(ARRAY(Float()))
145
146
147
def create_tables():
148
    """Create tables for demandregio data
149
    Returns
150
    -------
151
    None.
152
    """
153
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;")
154
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS society;")
155
    engine = db.engine()
156
    EgonDemandRegioHH.__table__.create(bind=engine, checkfirst=True)
157
    EgonDemandRegioCtsInd.__table__.create(bind=engine, checkfirst=True)
158
    EgonDemandRegioPopulation.__table__.create(bind=engine, checkfirst=True)
159
    EgonDemandRegioHouseholds.__table__.create(bind=engine, checkfirst=True)
160
    EgonDemandRegioWz.__table__.create(bind=engine, checkfirst=True)
161
    DemandRegioLoadProfiles.__table__.create(bind=db.engine(), checkfirst=True)
162
    EgonDemandRegioTimeseriesCtsInd.__table__.drop(
163
        bind=engine, checkfirst=True
164
    )
165
    EgonDemandRegioTimeseriesCtsInd.__table__.create(
166
        bind=engine, checkfirst=True
167
    )
168
169
170
def data_in_boundaries(df):
171
    """Select rows with nuts3 code within boundaries, used for testmode
172
173
    Parameters
174
    ----------
175
    df : pandas.DataFrame
176
        Data for all nuts3 regions
177
178
    Returns
179
    -------
180
    pandas.DataFrame
181
        Data for nuts3 regions within boundaries
182
183
    """
184
    engine = db.engine()
185
186
    df = df.reset_index()
187
188
    # Change nuts3 region names to 2016 version
189
    nuts_names = {"DEB16": "DEB1C", "DEB19": "DEB1D"}
190
    df.loc[df.nuts3.isin(nuts_names), "nuts3"] = df.loc[
191
        df.nuts3.isin(nuts_names), "nuts3"
192
    ].map(nuts_names)
193
194
    df = df.set_index("nuts3")
195
196
    return df[
197
        df.index.isin(
198
            pd.read_sql(
199
                "SELECT DISTINCT ON (nuts) nuts FROM boundaries.vg250_krs",
200
                engine,
201
            ).nuts
202
        )
203
    ]
204
205
206
def insert_cts_ind_wz_definitions():
207
    """Insert demandregio's definitions of CTS and industrial branches
208
209
    Returns
210
    -------
211
    None.
212
213
    """
214
215
    source = egon.data.config.datasets()["demandregio_cts_ind_demand"][
216
        "sources"
217
    ]
218
219
    target = egon.data.config.datasets()["demandregio_cts_ind_demand"][
220
        "targets"
221
    ]["wz_definitions"]
222
223
    engine = db.engine()
224
225
    for sector in source["wz_definitions"]:
226
        file_path = (
227
            Path(".")
228
            / "data_bundle_egon_data"
229
            / "WZ_definition"
230
            / source["wz_definitions"][sector]
231
        )
232
233
        if sector == "CTS":
234
            delimiter = ";"
235
        else:
236
            delimiter = ","
237
        df = (
238
            pd.read_csv(file_path, delimiter=delimiter, header=None)
239
            .rename({0: "wz", 1: "definition"}, axis="columns")
240
            .set_index("wz")
241
        )
242
        df["sector"] = sector
243
        df.to_sql(
244
            target["table"],
245
            engine,
246
            schema=target["schema"],
247
            if_exists="append",
248
        )
249
250
251
def match_nuts3_bl():
252
    """Function that maps the federal state to each nuts3 region
253
254
    Returns
255
    -------
256
    df : pandas.DataFrame
257
        List of nuts3 regions and the federal state of Germany.
258
259
    """
260
261
    engine = db.engine()
262
263
    df = pd.read_sql(
264
        "SELECT DISTINCT ON (boundaries.vg250_krs.nuts) "
265
        "boundaries.vg250_krs.nuts, boundaries.vg250_lan.gen "
266
        "FROM boundaries.vg250_lan, boundaries.vg250_krs "
267
        " WHERE ST_CONTAINS("
268
        "boundaries.vg250_lan.geometry, "
269
        "boundaries.vg250_krs.geometry)",
270
        con=engine,
271
    )
272
273
    df.gen[df.gen == "Baden-Württemberg (Bodensee)"] = "Baden-Württemberg"
274
    df.gen[df.gen == "Bayern (Bodensee)"] = "Bayern"
275
276
    return df.set_index("nuts")
277
278
279
def adjust_ind_pes(ec_cts_ind):
280
    """
281
    Adjust electricity demand of industrial consumers due to electrification
282
    of process heat based on assumptions of pypsa-eur-sec.
283
284
    Parameters
285
    ----------
286
    ec_cts_ind : pandas.DataFrame
287
        Industrial demand without additional electrification
288
289
    Returns
290
    -------
291
    ec_cts_ind : pandas.DataFrame
292
        Industrial demand with additional electrification
293
294
    """
295
296
    pes_path = (
297
        Path(".") / "data_bundle_powerd_data" / "pypsa_eur" / "resources"
298
    )
299
300
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
301
        "sources"
302
    ]["new_consumers_2050"]
303
304
    # Extract today's industrial demand from pypsa-eur-sec
305
    demand_today = pd.read_csv(
306
        pes_path / sources["pes-demand-today"],
307
        header=None,
308
    ).transpose()
309
310
    # Filter data
311
    demand_today[1].fillna("carrier", inplace=True)
312
    demand_today = demand_today[
313
        (demand_today[0] == "DE") | (demand_today[1] == "carrier")
314
    ].drop([0, 2], axis="columns")
315
316
    demand_today = (
317
        demand_today.transpose()
318
        .set_index(0)
319
        .transpose()
320
        .set_index("carrier")
321
        .transpose()
322
        .loc["electricity"]
323
        .astype(float)
324
    )
325
326
    # Calculate future industrial demand from pypsa-eur-sec
327
    # based on production and energy demands per carrier ('sector ratios')
328
    prod_tomorrow = pd.read_csv(pes_path / sources["pes-production-tomorrow"])
329
330
    prod_tomorrow = prod_tomorrow[prod_tomorrow["kton/a"] == "DE"].set_index(
331
        "kton/a"
332
    )
333
334
    sector_ratio = (
335
        pd.read_csv(pes_path / sources["pes-sector-ratios"])
336
        .set_index("MWh/tMaterial")
337
        .loc["elec"]
338
    )
339
340
    demand_tomorrow = prod_tomorrow.multiply(
341
        sector_ratio.div(1000)
342
    ).transpose()["DE"]
343
344
    # Calculate changes of electrical demand per sector in pypsa-eur-sec
345
    change = pd.DataFrame(
346
        (demand_tomorrow / demand_today)
347
        / (demand_tomorrow / demand_today).sum()
348
    )
349
350
    # Drop rows without changes
351
    change = change[~change[0].isnull()]
352
353
    # Map industrial branches of pypsa-eur-sec to WZ2008 used in demandregio
354
    change["wz"] = change.index.map(
355
        {
356
            "Alumina production": 24,
357
            "Aluminium - primary production": 24,
358
            "Aluminium - secondary production": 24,
359
            "Ammonia": 20,
360
            "Basic chemicals (without ammonia)": 20,
361
            "Cement": 23,
362
            "Ceramics & other NMM": 23,
363
            "Electric arc": 24,
364
            "Food, beverages and tobacco": 10,
365
            "Glass production": 23,
366
            "Integrated steelworks": 24,
367
            "Machinery Equipment": 28,
368
            "Other Industrial Sectors": 32,
369
            "Other chemicals": 20,
370
            "Other non-ferrous metals": 24,
371
            "Paper production": 17,
372
            "Pharmaceutical products etc.": 21,
373
            "Printing and media reproduction": 18,
374
            "Pulp production": 17,
375
            "Textiles and leather": 13,
376
            "Transport Equipment": 29,
377
            "Wood and wood products": 16,
378
        }
379
    )
380
381
    # Group by WZ2008
382
    shares_per_wz = change.groupby("wz")[0].sum()
383
384
    # Calculate addtional demands needed to meet future demand of pypsa-eur-sec
385
    addtional_mwh = shares_per_wz.multiply(
386
        demand_tomorrow.sum() * 1000000 - ec_cts_ind.sum().sum()
387
    )
388
389
    # Calulate overall industrial demand for eGon100RE
390
    final_mwh = addtional_mwh + ec_cts_ind[addtional_mwh.index].sum()
391
392
    # Linear scale the industrial demands per nuts3 and wz to meet final demand
393
    ec_cts_ind[addtional_mwh.index] *= (
394
        final_mwh / ec_cts_ind[addtional_mwh.index].sum()
395
    )
396
397
    return ec_cts_ind
398
399
400
def adjust_cts_ind_nep(ec_cts_ind, sector):
401
    """Add electrical demand of new largescale CTS und industrial consumers
402
    according to NEP 2021, scneario C 2035. Values per federal state are
403
    linear distributed over all CTS branches and nuts3 regions.
404
405
    Parameters
406
    ----------
407
    ec_cts_ind : pandas.DataFrame
408
        CTS or industry demand without new largescale consumers.
409
410
    Returns
411
    -------
412
    ec_cts_ind : pandas.DataFrame
413
        CTS or industry demand including new largescale consumers.
414
415
    """
416
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
417
        "sources"
418
    ]
419
420
    file_path = (
421
        Path(".")
422
        / "data_bundle_egon_data"
423
        / "nep2035_version2021"
424
        / sources["new_consumers_2035"]
425
    )
426
427
    # get data from NEP per federal state
428
    new_con = pd.read_csv(file_path, delimiter=";", decimal=",", index_col=0)
429
430
    # match nuts3 regions to federal states
431
    groups = ec_cts_ind.groupby(match_nuts3_bl().gen)
432
433
    # update demands per federal state
434
    for group in groups.indices.keys():
435
        g = groups.get_group(group)
436
        data_new = g.mul(1 + new_con[sector][group] * 1e6 / g.sum().sum())
437
        ec_cts_ind[ec_cts_ind.index.isin(g.index)] = data_new
438
439
    return ec_cts_ind
440
441
442
def disagg_households_power(
443
    scenario, year, weight_by_income=False, original=False, **kwargs
444
):
445
    """
446
    Perform spatial disaggregation of electric power in [GWh/a] by key and
447
    possibly weight by income.
448
    Similar to disaggregator.spatial.disagg_households_power
449
450
451
    Parameters
452
    ----------
453
    by : str
454
        must be one of ['households', 'population']
455
    weight_by_income : bool, optional
456
        Flag if to weight the results by the regional income (default False)
457
    orignal : bool, optional
458
        Throughput to function households_per_size,
459
        A flag if the results should be left untouched and returned in
460
        original form for the year 2011 (True) or if they should be scaled to
461
        the given `year` by the population in that year (False).
462
463
    Returns
464
    -------
465
    pd.DataFrame or pd.Series
466
    """
467
    # source: survey of energieAgenturNRW
468
    # with/without direct water heating (DHW), and weighted average
469
    # https://1-stromvergleich.com/wp-content/uploads/erhebung_wo_bleibt_der_strom.pdf
470
    demand_per_hh_size = pd.DataFrame(
471
        index=range(1, 7),
472
        data={
473
            # "weighted DWH": [2290, 3202, 4193, 4955, 5928, 5928],
474
            # "without DHW": [1714, 2812, 3704, 4432, 5317, 5317],
475
            "with_DHW": [2181, 3843, 5151, 6189, 7494, 8465],
476
            "without_DHW": [1798, 2850, 3733, 4480, 5311, 5816],
477
            "weighted": [2256, 3248, 4246, 5009, 5969, 6579],
478
        },
479
    )
480
481
    if scenario == "eGon100RE":
482
        # chose demand per household size from survey without DHW
483
        power_per_HH = (
484
            demand_per_hh_size["without_DHW"] / 1e3
485
        )  # TODO why without?
486
487
        # calculate demand per nuts3 in 2011
488
        df_2011 = data.households_per_size(year=2011) * power_per_HH
489
490
        # scale demand per hh-size to meet demand without heat
491
        # according to JRC in 2011 (136.6-(20.14+9.41) TWh)
492
        # TODO check source and method
493
        power_per_HH *= (136.6 - (20.14 + 9.41)) * 1e6 / df_2011.sum().sum()
494
495
        # calculate demand per nuts3 in 2050
496
        df = data.households_per_size(year=year) * power_per_HH
497
498
    # Bottom-Up: Power demand by household sizes in [MWh/a] for each scenario
499
    elif scenario in ["status2019", "status2023", "eGon2021", "eGon2035"]:
500
        # chose demand per household size from survey including weighted DHW
501
        power_per_HH = demand_per_hh_size["weighted"] / 1e3
502
503
        # calculate demand per nuts3
504
        df = (
505
            data.households_per_size(original=original, year=year)
506
            * power_per_HH
507
        )
508
509
        if scenario == "eGon2035":
510
            # scale to fit demand of NEP 2021 scebario C 2035 (119TWh)
511
            df *= 119 * 1e6 / df.sum().sum()
512
513
        if scenario == "status2023":
514
            # scale to fit demand of BDEW 2023 (130.48 TWh) see issue #180
515
            df *= 130.48 * 1e6 / df.sum().sum()
516
517
        # if scenario == "status2021": # TODO status2021
518
        #     # scale to fit demand of AGEB 2021 (138.6 TWh)
519
        #     # https://ag-energiebilanzen.de/wp-content/uploads/2023/01/AGEB_22p2_rev-1.pdf#page=10
520
        #     df *= 138.6 * 1e6 / df.sum().sum()
521
522
    elif scenario == "eGon100RE":
523
        # chose demand per household size from survey without DHW
524
        power_per_HH = demand_per_hh_size["without DHW"] / 1e3
525
526
        # calculate demand per nuts3 in 2011
527
        df_2011 = data.households_per_size(year=2011) * power_per_HH
528
529
        # scale demand per hh-size to meet demand without heat
530
        # according to JRC in 2011 (136.6-(20.14+9.41) TWh)
531
        power_per_HH *= (136.6 - (20.14 + 9.41)) * 1e6 / df_2011.sum().sum()
532
533
        # calculate demand per nuts3 in 2050
534
        df = data.households_per_size(year=year) * power_per_HH
535
536
        # scale to meet annual demand from NEP 2023, scenario B 2045
537
        df *= 90400000 / df.sum().sum()
538
539
    else:
540
        print(
541
            f"Electric demand per household size for scenario {scenario} "
542
            "is not specified."
543
        )
544
545
    if weight_by_income:
546
        df = spatial.adjust_by_income(df=df)
0 ignored issues
show
introduced by
The variable df does not seem to be defined for all execution paths.
Loading history...
547
548
    return df
549
550
551
def write_demandregio_hh_profiles_to_db(hh_profiles):
552
    """Write HH demand profiles from demand regio into db. One row per
553
    year and nuts3. The annual load profile timeseries is an array.
554
555
    schema: demand
556
    tablename: demandregio_household_load_profiles
557
558
559
560
    Parameters
561
    ----------
562
    hh_profiles: pd.DataFrame
563
564
    Returns
565
    -------
566
    """
567
    years = hh_profiles.index.year.unique().values
568
    df_to_db = pd.DataFrame(
569
        columns=["id", "year", "nuts3", "load_in_mwh"]
570
    ).set_index("id")
571
    dataset = egon.data.config.settings()["egon-data"]["--dataset-boundary"]
572
573
    if dataset == "Schleswig-Holstein":
574
        hh_profiles = hh_profiles.loc[
575
            :, hh_profiles.columns.str.contains("DEF0")
576
        ]
577
578
    idx = pd.read_sql_query(
579
        f"""
580
                           SELECT MAX(id)
581
                           FROM {DemandRegioLoadProfiles.__table__.schema}.
582
                           {DemandRegioLoadProfiles.__table__.name}
583
                           """,
584
        con=db.engine(),
585
    ).iat[0, 0]
586
587
    idx = 0 if idx is None else idx + 1
588
589
    for year in years:
590
        df = hh_profiles[hh_profiles.index.year == year]
591
592
        for nuts3 in hh_profiles.columns:
593
            idx += 1
594
            df_to_db.at[idx, "year"] = year
595
            df_to_db.at[idx, "nuts3"] = nuts3
596
            df_to_db.at[idx, "load_in_mwh"] = df[nuts3].to_list()
597
598
    df_to_db["year"] = df_to_db["year"].apply(int)
599
    df_to_db["nuts3"] = df_to_db["nuts3"].astype(str)
600
    df_to_db["load_in_mwh"] = df_to_db["load_in_mwh"].apply(list)
601
    df_to_db = df_to_db.reset_index()
602
603
    df_to_db.to_sql(
604
        name=DemandRegioLoadProfiles.__table__.name,
605
        schema=DemandRegioLoadProfiles.__table__.schema,
606
        con=db.engine(),
607
        if_exists="append",
608
        index=-False,
609
    )
610
611
612
def insert_hh_demand(scenario, year, engine):
613
    """Calculates electrical demands of private households using demandregio's
614
    disaggregator and insert results into the database.
615
616
    Parameters
617
    ----------
618
    scenario : str
619
        Name of the corresponding scenario.
620
    year : int
621
        The number of households per region is taken from this year.
622
623
    Returns
624
    -------
625
    None.
626
627
    """
628
    targets = egon.data.config.datasets()["demandregio_household_demand"][
629
        "targets"
630
    ]["household_demand"]
631
    # get demands of private households per nuts and size from demandregio
632
    ec_hh = disagg_households_power(scenario, year)
633
634
    # Select demands for nuts3-regions in boundaries (needed for testmode)
635
    ec_hh = data_in_boundaries(ec_hh)
636
637
    # insert into database
638
    for hh_size in ec_hh.columns:
639
        df = pd.DataFrame(ec_hh[hh_size])
640
        df["year"] = (
641
            2023 if scenario == "status2023" else year
642
        )  # TODO status2023
643
        # adhoc fix until ffeopendata servers are up and population_year can be set
644
645
        df["scenario"] = scenario
646
        df["hh_size"] = hh_size
647
        df = df.rename({hh_size: "demand"}, axis="columns")
648
        df.to_sql(
649
            targets["table"],
650
            engine,
651
            schema=targets["schema"],
652
            if_exists="append",
653
        )
654
655
    # insert housholds demand timeseries
656
    try:
657
        hh_load_timeseries = (
658
            temporal.disagg_temporal_power_housholds_slp(
659
                use_nuts3code=True,
660
                by="households",
661
                weight_by_income=False,
662
                year=year,
663
            )
664
            .resample("h")
665
            .sum()
666
        )
667
        hh_load_timeseries.rename(
668
            columns={"DEB16": "DEB1C", "DEB19": "DEB1D"}, inplace=True
669
        )
670
    except Exception as e:
671
        logger.warning(
672
            f"Couldnt get profiles from FFE, will use pickeld fallback! \n {e}"
673
        )
674
        hh_load_timeseries = pd.read_csv(
675
            "data_bundle_egon_data/demand_regio_backup/df_load_profiles.csv",
676
            index_col="time",
677
        )
678
        hh_load_timeseries.index = pd.to_datetime(
679
            hh_load_timeseries.index, format="%Y-%m-%d %H:%M:%S"
680
        )
681
682
        def change_year(dt, year):
683
            return dt.replace(year=year)
684
685
        year = 2023 if scenario == "status2023" else year  # TODO status2023
686
        hh_load_timeseries.index = hh_load_timeseries.index.map(
687
            lambda dt: change_year(dt, year)
0 ignored issues
show
introduced by
The variable change_year does not seem to be defined for all execution paths.
Loading history...
688
        )
689
690
        if scenario == "status2023":
691
            hh_load_timeseries = hh_load_timeseries.shift(24 * 2)
692
693
            hh_load_timeseries.iloc[: 24 * 7] = hh_load_timeseries.iloc[
694
                24 * 7 : 24 * 7 * 2
695
            ].values
696
697
    write_demandregio_hh_profiles_to_db(hh_load_timeseries)
698
699
700
def insert_cts_ind(scenario, year, engine, target_values):
701
    """Calculates electrical demands of CTS and industry using demandregio's
702
    disaggregator, adjusts them according to resulting values of NEP 2021 or
703
    JRC IDEES and insert results into the database.
704
705
    Parameters
706
    ----------
707
    scenario : str
708
        Name of the corresponing scenario.
709
    year : int
710
        The number of households per region is taken from this year.
711
    target_values : dict
712
        List of target values for each scenario and sector.
713
714
    Returns
715
    -------
716
    None.
717
718
    """
719
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
720
        "targets"
721
    ]
722
723
    wz_table = pd.read_sql(
724
        "SELECT wz, sector FROM demand.egon_demandregio_wz",
725
        con=engine,
726
        index_col="wz",
727
    )
728
729
    # Workaround: Since the disaggregator does not work anymore, data from
730
    # previous runs is used for eGon2035 and eGon100RE
731
    if scenario == "eGon2035":
732
        file2035_path = (
733
            Path(".")
734
            / "data_bundle_egon_data"
735
            / "demand_regio_backup"
736
            / "egon_demandregio_cts_ind_egon2035.csv"
737
        )
738
        ec_cts_ind2 = pd.read_csv(file2035_path)
739
        ec_cts_ind2.to_sql(
740
            targets["cts_ind_demand"]["table"],
741
            engine,
742
            targets["cts_ind_demand"]["schema"],
743
            if_exists="append",
744
            index=False,
745
        )
746
        return
747
748
    if scenario == "eGon100RE":
749
        ec_cts_ind2 = pd.read_csv(
750
            "data_bundle_egon_data/demand_regio_backup/egon_demandregio_cts_ind.csv"
751
        )
752
        ec_cts_ind2["sector"] = ec_cts_ind2["wz"].map(wz_table["sector"])
753
        factor_ind = target_values[scenario]["industry"] / (
754
            ec_cts_ind2[ec_cts_ind2["sector"] == "industry"]["demand"].sum()
755
            / 1000
756
        )
757
        factor_cts = target_values[scenario]["CTS"] / (
758
            ec_cts_ind2[ec_cts_ind2["sector"] == "CTS"]["demand"].sum() / 1000
759
        )
760
761
        ec_cts_ind2["demand"] = ec_cts_ind2.apply(
762
            lambda x: (
763
                x["demand"] * factor_ind
0 ignored issues
show
introduced by
The variable factor_ind does not seem to be defined for all execution paths.
Loading history...
764
                if x["sector"] == "industry"
765
                else x["demand"] * factor_cts
0 ignored issues
show
introduced by
The variable factor_cts does not seem to be defined for all execution paths.
Loading history...
766
            ),
767
            axis=1,
768
        )
769
770
        ec_cts_ind2.drop(columns=["sector"], inplace=True)
771
772
        ec_cts_ind2.to_sql(
773
            targets["cts_ind_demand"]["table"],
774
            engine,
775
            targets["cts_ind_demand"]["schema"],
776
            if_exists="append",
777
            index=False,
778
        )
779
        return
780
781
    for sector in ["CTS", "industry"]:
782
        # get demands per nuts3 and wz of demandregio
783
        ec_cts_ind = spatial.disagg_CTS_industry(
784
            use_nuts3code=True, source="power", sector=sector, year=year
785
        ).transpose()
786
787
        ec_cts_ind.index = ec_cts_ind.index.rename("nuts3")
788
789
        # exclude mobility sector from GHD
790
        ec_cts_ind = ec_cts_ind.drop(columns=49, errors="ignore")
791
792
        # scale values according to target_values
793
        if sector in target_values[scenario].keys():
794
            ec_cts_ind *= (
795
                target_values[scenario][sector] * 1e3 / ec_cts_ind.sum().sum()
796
            )
797
798
        # include new largescale consumers according to NEP 2021
799
        if scenario == "eGon2035":
800
            ec_cts_ind = adjust_cts_ind_nep(ec_cts_ind, sector)
801
        # include new industrial demands due to sector coupling
802
        if (scenario == "eGon100RE") & (sector == "industry"):
803
            ec_cts_ind = adjust_ind_pes(ec_cts_ind)
804
805
        # Select demands for nuts3-regions in boundaries (needed for testmode)
806
        ec_cts_ind = data_in_boundaries(ec_cts_ind)
807
808
        # insert into database
809
        for wz in ec_cts_ind.columns:
810
            df = pd.DataFrame(ec_cts_ind[wz])
811
            df["year"] = year
812
            df["wz"] = wz
813
            df["scenario"] = scenario
814
            df = df.rename({wz: "demand"}, axis="columns")
815
            df.index = df.index.rename("nuts3")
816
            df.to_sql(
817
                targets["cts_ind_demand"]["table"],
818
                engine,
819
                targets["cts_ind_demand"]["schema"],
820
                if_exists="append",
821
            )
822
823
824
def insert_household_demand():
825
    """Insert electrical demands for households according to
826
    demandregio using its disaggregator-tool in MWh
827
828
    Returns
829
    -------
830
    None.
831
832
    """
833
    targets = egon.data.config.datasets()["demandregio_household_demand"][
834
        "targets"
835
    ]
836
    engine = db.engine()
837
838
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
839
840
    scenarios.append("eGon2021")
841
842
    for t in targets:
843
        db.execute_sql(
844
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
845
        )
846
847
    for scn in scenarios:
848
        year = (
849
            2023
850
            if scn == "status2023"
851
            else scenario_parameters.global_settings(scn)["population_year"]
852
        )
853
854
        # Insert demands of private households
855
        insert_hh_demand(scn, year, engine)
856
857
858
def insert_cts_ind_demands():
859
    """Insert electricity demands per nuts3-region in Germany according to
860
    demandregio using its disaggregator-tool in MWh
861
862
    Returns
863
    -------
864
    None.
865
866
    """
867
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
868
        "targets"
869
    ]
870
    engine = db.engine()
871
872
    for t in targets:
873
        db.execute_sql(
874
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
875
        )
876
877
    insert_cts_ind_wz_definitions()
878
879
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
880
881
    scenarios.append("eGon2021")
882
883
    for scn in scenarios:
884
        year = scenario_parameters.global_settings(scn)["population_year"]
885
886
        if year > 2035:
887
            year = 2035
888
889
        # target values per scenario in MWh
890
        target_values = {
891
            # according to NEP 2021
892
            # new consumers will be added seperatly
893
            "eGon2035": {"CTS": 135300, "industry": 225400},
894
            # CTS: reduce overall demand from demandregio (without traffic)
895
            # by share of heat according to JRC IDEES, data from 2011
896
            # industry: no specific heat demand, use data from demandregio
897
            "eGon100RE": {"CTS": 146700, "industry": 382900},
898
            # no adjustments for status quo
899
            "eGon2021": {},
900
            "status2019": {},
901
            "status2023": {"CTS": 121160 * 1e3, "industry": 200380 * 1e3},
902
        }
903
904
        insert_cts_ind(scn, year, engine, target_values)
905
906
    # Insert load curves per wz
907
    timeseries_per_wz()
908
909
910
def insert_society_data():
911
    """Insert population and number of households per nuts3-region in Germany
912
    according to demandregio using its disaggregator-tool
913
914
    Returns
915
    -------
916
    None.
917
918
    """
919
    targets = egon.data.config.datasets()["demandregio_society"]["targets"]
920
    engine = db.engine()
921
922
    for t in targets:
923
        db.execute_sql(
924
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
925
        )
926
927
    target_years = np.append(
928
        get_sector_parameters("global").population_year.values, 2018
929
    )
930
931
    for year in target_years:
932
        df_pop = pd.DataFrame(data.population(year=year))
933
        df_pop["year"] = year
934
        df_pop = df_pop.rename({"value": "population"}, axis="columns")
935
        # Select data for nuts3-regions in boundaries (needed for testmode)
936
        df_pop = data_in_boundaries(df_pop)
937
        df_pop.to_sql(
938
            targets["population"]["table"],
939
            engine,
940
            schema=targets["population"]["schema"],
941
            if_exists="append",
942
        )
943
944
    for year in target_years:
945
        df_hh = pd.DataFrame(data.households_per_size(year=year))
946
        # Select data for nuts3-regions in boundaries (needed for testmode)
947
        df_hh = data_in_boundaries(df_hh)
948
        for hh_size in df_hh.columns:
949
            df = pd.DataFrame(df_hh[hh_size])
950
            df["year"] = year
951
            df["hh_size"] = hh_size
952
            df = df.rename({hh_size: "households"}, axis="columns")
953
            df.to_sql(
954
                targets["household"]["table"],
955
                engine,
956
                schema=targets["household"]["schema"],
957
                if_exists="append",
958
            )
959
960
961
def insert_timeseries_per_wz(sector, year):
962
    """Insert normalized electrical load time series for the selected sector
963
964
    Parameters
965
    ----------
966
    sector : str
967
        Name of the sector. ['CTS', 'industry']
968
    year : int
969
        Selected weather year
970
971
    Returns
972
    -------
973
    None.
974
975
    """
976
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
977
        "targets"
978
    ]
979
980
    if sector == "CTS":
981
        profiles = (
982
            data.CTS_power_slp_generator("SH", year=year)
983
            .drop(
984
                [
985
                    "Day",
986
                    "Hour",
987
                    "DayOfYear",
988
                    "WD",
989
                    "SA",
990
                    "SU",
991
                    "WIZ",
992
                    "SOZ",
993
                    "UEZ",
994
                ],
995
                axis="columns",
996
            )
997
            .resample("H")
998
            .sum()
999
        )
1000
        wz_slp = config.slp_branch_cts_power()
1001
    elif sector == "industry":
1002
        profiles = (
1003
            data.shift_load_profile_generator(state="SH", year=year)
1004
            .resample("H")
1005
            .sum()
1006
        )
1007
        wz_slp = config.shift_profile_industry()
1008
1009
    else:
1010
        print(f"Sector {sector} is not valid.")
1011
1012
    df = pd.DataFrame(
1013
        index=wz_slp.keys(), columns=["slp", "load_curve", "year"]
0 ignored issues
show
introduced by
The variable wz_slp does not seem to be defined for all execution paths.
Loading history...
1014
    )
1015
1016
    df.index.rename("wz", inplace=True)
1017
1018
    df.slp = wz_slp.values()
1019
1020
    df.year = year
1021
1022
    df.load_curve = profiles[df.slp].transpose().values.tolist()
0 ignored issues
show
introduced by
The variable profiles does not seem to be defined for all execution paths.
Loading history...
1023
1024
    db.execute_sql(
1025
        f"""
1026
                   DELETE FROM {targets['timeseries_cts_ind']['schema']}.
1027
                   {targets['timeseries_cts_ind']['table']}
1028
                   WHERE wz IN (
1029
                       SELECT wz FROM {targets['wz_definitions']['schema']}.
1030
                       {targets['wz_definitions']['table']}
1031
                       WHERE sector = '{sector}')
1032
                   """
1033
    )
1034
1035
    df.to_sql(
1036
        targets["timeseries_cts_ind"]["table"],
1037
        schema=targets["timeseries_cts_ind"]["schema"],
1038
        con=db.engine(),
1039
        if_exists="append",
1040
    )
1041
1042
1043
def timeseries_per_wz():
1044
    """Calcultae and insert normalized timeseries per wz for cts and industry
1045
1046
    Returns
1047
    -------
1048
    None.
1049
1050
    """
1051
1052
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
1053
    year_already_in_database = []
1054
    for scn in scenarios:
1055
        year = int(scenario_parameters.global_settings(scn)["weather_year"])
1056
1057
        for sector in ["CTS", "industry"]:
1058
            if not year in year_already_in_database:
1059
                insert_timeseries_per_wz(sector, int(year))
1060
        year_already_in_database.append(year)
1061
1062
1063
def get_cached_tables():
1064
    """Get cached demandregio tables and db-dump from former runs"""
1065
    data_config = egon.data.config.datasets()
1066
    for s in ["cache", "dbdump"]:
1067
        source_path = data_config["demandregio_workaround"]["source"][s][
1068
            "path"
1069
        ]
1070
        target_path = Path(
1071
            ".", data_config["demandregio_workaround"]["targets"][s]["path"]
1072
        )
1073
        os.makedirs(target_path, exist_ok=True)
1074
1075
        with zipfile.ZipFile(source_path, "r") as zip_ref:
1076
            zip_ref.extractall(path=target_path)
1077