Passed
Pull Request — dev (#1170)
by
unknown
05:05
created

data.datasets.demandregio   C

Complexity

Total Complexity 54

Size/Duplication

Total Lines 946
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 54
eloc 480
dl 0
loc 946
rs 6.4799
c 0
b 0
f 0

16 Functions

Rating   Name   Duplication   Size   Complexity  
A insert_cts_ind_wz_definitions() 0 42 3
B insert_cts_ind() 0 64 6
A insert_cts_ind_demands() 0 57 4
A create_tables() 0 19 1
A data_in_boundaries() 0 32 1
B adjust_ind_pes() 0 119 1
B disagg_households_power() 0 90 6
A insert_household_demand() 0 26 3
A timeseries_per_wz() 0 16 3
A adjust_cts_ind_nep() 0 40 2
A match_nuts3_bl() 0 26 1
B insert_society_data() 0 48 5
A write_demandregio_hh_profiles_to_db() 0 50 4
B insert_timeseries_per_wz() 0 79 3
B insert_hh_demand() 0 71 7
A get_cached_tables() 0 13 3

1 Method

Rating   Name   Duplication   Size   Complexity  
A DemandRegio.__init__() 0 13 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.demandregio often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""The central module containing all code dealing with importing and
2
adjusting data from demandRegio
3
4
"""
5
from pathlib import Path
6
import subprocess
7
import os
8
import zipfile
9
10
from sqlalchemy import ARRAY, Column, Float, ForeignKey, Integer, String
11
from sqlalchemy.ext.declarative import declarative_base
12
import numpy as np
13
import pandas as pd
14
15
from egon.data import db, logger
16
from egon.data.datasets import Dataset, wrapped_partial
17
from egon.data.datasets.demandregio.install_disaggregator import (
18
    clone_and_install,
19
)
20
from egon.data.datasets.scenario_parameters import (
21
    EgonScenario,
22
    get_sector_parameters,
23
)
24
from egon.data.datasets.zensus import download_and_check
25
import egon.data.config
26
import egon.data.datasets.scenario_parameters.parameters as scenario_parameters
27
28
try:
29
    from disaggregator import config, data, spatial, temporal
30
31
except ImportError as e:
32
    pass
33
34
# will be later imported from another file ###
35
Base = declarative_base()
36
37
38
class DemandRegio(Dataset):
39
    def __init__(self, dependencies):
40
        super().__init__(
41
            name="DemandRegio",
42
            version="0.0.9",
43
            dependencies=dependencies,
44
            tasks=(
45
                clone_and_install,
46
                get_cached_tables,  # adhoc workaround #180
47
                create_tables,
48
                {
49
                    insert_household_demand,
50
                    insert_society_data,
51
                    insert_cts_ind_demands,
52
                },
53
            ),
54
        )
55
56
class DemandRegioLoadProfiles(Base):
57
    __tablename__ = "demandregio_household_load_profiles"
58
    __table_args__ = {"schema": "demand"}
59
60
    id = Column(Integer, primary_key=True)
61
    year = Column(Integer)
62
    nuts3 = Column(String)
63
    load_in_mwh = Column(ARRAY(Float()))
64
65
class EgonDemandRegioHH(Base):
66
    __tablename__ = "egon_demandregio_hh"
67
    __table_args__ = {"schema": "demand"}
68
    nuts3 = Column(String(5), primary_key=True)
69
    hh_size = Column(Integer, primary_key=True)
70
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
71
    year = Column(Integer)
72
    demand = Column(Float)
73
74
75
class EgonDemandRegioCtsInd(Base):
76
    __tablename__ = "egon_demandregio_cts_ind"
77
    __table_args__ = {"schema": "demand"}
78
    nuts3 = Column(String(5), primary_key=True)
79
    wz = Column(Integer, primary_key=True)
80
    scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True)
81
    year = Column(Integer)
82
    demand = Column(Float)
83
84
85
class EgonDemandRegioPopulation(Base):
86
    __tablename__ = "egon_demandregio_population"
87
    __table_args__ = {"schema": "society"}
88
    nuts3 = Column(String(5), primary_key=True)
89
    year = Column(Integer, primary_key=True)
90
    population = Column(Float)
91
92
93
class EgonDemandRegioHouseholds(Base):
94
    __tablename__ = "egon_demandregio_household"
95
    __table_args__ = {"schema": "society"}
96
    nuts3 = Column(String(5), primary_key=True)
97
    hh_size = Column(Integer, primary_key=True)
98
    year = Column(Integer, primary_key=True)
99
    households = Column(Integer)
100
101
102
class EgonDemandRegioWz(Base):
103
    __tablename__ = "egon_demandregio_wz"
104
    __table_args__ = {"schema": "demand"}
105
    wz = Column(Integer, primary_key=True)
106
    sector = Column(String(50))
107
    definition = Column(String(150))
108
109
110
class EgonDemandRegioTimeseriesCtsInd(Base):
111
    __tablename__ = "egon_demandregio_timeseries_cts_ind"
112
    __table_args__ = {"schema": "demand"}
113
    wz = Column(Integer, primary_key=True)
114
    year = Column(Integer, primary_key=True)
115
    slp = Column(String(50))
116
    load_curve = Column(ARRAY(Float()))
117
118
119
def create_tables():
120
    """Create tables for demandregio data
121
    Returns
122
    -------
123
    None.
124
    """
125
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;")
126
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS society;")
127
    engine = db.engine()
128
    EgonDemandRegioHH.__table__.create(bind=engine, checkfirst=True)
129
    EgonDemandRegioCtsInd.__table__.create(bind=engine, checkfirst=True)
130
    EgonDemandRegioPopulation.__table__.create(bind=engine, checkfirst=True)
131
    EgonDemandRegioHouseholds.__table__.create(bind=engine, checkfirst=True)
132
    EgonDemandRegioWz.__table__.create(bind=engine, checkfirst=True)
133
    EgonDemandRegioTimeseriesCtsInd.__table__.drop(
134
        bind=engine, checkfirst=True
135
    )
136
    EgonDemandRegioTimeseriesCtsInd.__table__.create(
137
        bind=engine, checkfirst=True
138
    )
139
140
141
def data_in_boundaries(df):
142
    """Select rows with nuts3 code within boundaries, used for testmode
143
144
    Parameters
145
    ----------
146
    df : pandas.DataFrame
147
        Data for all nuts3 regions
148
149
    Returns
150
    -------
151
    pandas.DataFrame
152
        Data for nuts3 regions within boundaries
153
154
    """
155
    engine = db.engine()
156
157
    df = df.reset_index()
158
159
    # Change nuts3 region names to 2016 version
160
    nuts_names = {"DEB16": "DEB1C", "DEB19": "DEB1D"}
161
    df.loc[df.nuts3.isin(nuts_names), "nuts3"] = df.loc[
162
        df.nuts3.isin(nuts_names), "nuts3"
163
    ].map(nuts_names)
164
165
    df = df.set_index("nuts3")
166
167
    return df[
168
        df.index.isin(
169
            pd.read_sql(
170
                "SELECT DISTINCT ON (nuts) nuts FROM boundaries.vg250_krs",
171
                engine,
172
            ).nuts
173
        )
174
    ]
175
176
177
def insert_cts_ind_wz_definitions():
178
    """Insert demandregio's definitions of CTS and industrial branches
179
180
    Returns
181
    -------
182
    None.
183
184
    """
185
186
    source = egon.data.config.datasets()["demandregio_cts_ind_demand"][
187
        "sources"
188
    ]
189
190
    target = egon.data.config.datasets()["demandregio_cts_ind_demand"][
191
        "targets"
192
    ]["wz_definitions"]
193
194
    engine = db.engine()
195
196
    for sector in source["wz_definitions"]:
197
        file_path = (
198
            Path(".")
199
            / "data_bundle_egon_data"
200
            / "WZ_definition"
201
            / source["wz_definitions"][sector]
202
        )
203
204
        if sector == "CTS":
205
            delimiter = ";"
206
        else:
207
            delimiter = ","
208
        df = (
209
            pd.read_csv(file_path, delimiter=delimiter, header=None)
210
            .rename({0: "wz", 1: "definition"}, axis="columns")
211
            .set_index("wz")
212
        )
213
        df["sector"] = sector
214
        df.to_sql(
215
            target["table"],
216
            engine,
217
            schema=target["schema"],
218
            if_exists="append",
219
        )
220
221
222
def match_nuts3_bl():
223
    """Function that maps the federal state to each nuts3 region
224
225
    Returns
226
    -------
227
    df : pandas.DataFrame
228
        List of nuts3 regions and the federal state of Germany.
229
230
    """
231
232
    engine = db.engine()
233
234
    df = pd.read_sql(
235
        "SELECT DISTINCT ON (boundaries.vg250_krs.nuts) "
236
        "boundaries.vg250_krs.nuts, boundaries.vg250_lan.gen "
237
        "FROM boundaries.vg250_lan, boundaries.vg250_krs "
238
        " WHERE ST_CONTAINS("
239
        "boundaries.vg250_lan.geometry, "
240
        "boundaries.vg250_krs.geometry)",
241
        con=engine,
242
    )
243
244
    df.gen[df.gen == "Baden-Württemberg (Bodensee)"] = "Baden-Württemberg"
245
    df.gen[df.gen == "Bayern (Bodensee)"] = "Bayern"
246
247
    return df.set_index("nuts")
248
249
250
def adjust_ind_pes(ec_cts_ind):
251
    """
252
    Adjust electricity demand of industrial consumers due to electrification
253
    of process heat based on assumptions of pypsa-eur-sec.
254
255
    Parameters
256
    ----------
257
    ec_cts_ind : pandas.DataFrame
258
        Industrial demand without additional electrification
259
260
    Returns
261
    -------
262
    ec_cts_ind : pandas.DataFrame
263
        Industrial demand with additional electrification
264
265
    """
266
267
    pes_path = (
268
        Path(".") / "data_bundle_egon_data" / "pypsa_eur_sec" / "resources"
269
    )
270
271
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
272
        "sources"
273
    ]["new_consumers_2050"]
274
275
    # Extract today's industrial demand from pypsa-eur-sec
276
    demand_today = pd.read_csv(
277
        pes_path / sources["pes-demand-today"],
278
        header=None,
279
    ).transpose()
280
281
    # Filter data
282
    demand_today[1].fillna("carrier", inplace=True)
283
    demand_today = demand_today[
284
        (demand_today[0] == "DE") | (demand_today[1] == "carrier")
285
    ].drop([0, 2], axis="columns")
286
287
    demand_today = (
288
        demand_today.transpose()
289
        .set_index(0)
290
        .transpose()
291
        .set_index("carrier")
292
        .transpose()
293
        .loc["electricity"]
294
        .astype(float)
295
    )
296
297
    # Calculate future industrial demand from pypsa-eur-sec
298
    # based on production and energy demands per carrier ('sector ratios')
299
    prod_tomorrow = pd.read_csv(pes_path / sources["pes-production-tomorrow"])
300
301
    prod_tomorrow = prod_tomorrow[prod_tomorrow["kton/a"] == "DE"].set_index(
302
        "kton/a"
303
    )
304
305
    sector_ratio = (
306
        pd.read_csv(pes_path / sources["pes-sector-ratios"])
307
        .set_index("MWh/tMaterial")
308
        .loc["elec"]
309
    )
310
311
    demand_tomorrow = prod_tomorrow.multiply(
312
        sector_ratio.div(1000)
313
    ).transpose()["DE"]
314
315
    # Calculate changes of electrical demand per sector in pypsa-eur-sec
316
    change = pd.DataFrame(
317
        (demand_tomorrow / demand_today)
318
        / (demand_tomorrow / demand_today).sum()
319
    )
320
321
    # Drop rows without changes
322
    change = change[~change[0].isnull()]
323
324
    # Map industrial branches of pypsa-eur-sec to WZ2008 used in demandregio
325
    change["wz"] = change.index.map(
326
        {
327
            "Alumina production": 24,
328
            "Aluminium - primary production": 24,
329
            "Aluminium - secondary production": 24,
330
            "Ammonia": 20,
331
            "Basic chemicals (without ammonia)": 20,
332
            "Cement": 23,
333
            "Ceramics & other NMM": 23,
334
            "Electric arc": 24,
335
            "Food, beverages and tobacco": 10,
336
            "Glass production": 23,
337
            "Integrated steelworks": 24,
338
            "Machinery Equipment": 28,
339
            "Other Industrial Sectors": 32,
340
            "Other chemicals": 20,
341
            "Other non-ferrous metals": 24,
342
            "Paper production": 17,
343
            "Pharmaceutical products etc.": 21,
344
            "Printing and media reproduction": 18,
345
            "Pulp production": 17,
346
            "Textiles and leather": 13,
347
            "Transport Equipment": 29,
348
            "Wood and wood products": 16,
349
        }
350
    )
351
352
    # Group by WZ2008
353
    shares_per_wz = change.groupby("wz")[0].sum()
354
355
    # Calculate addtional demands needed to meet future demand of pypsa-eur-sec
356
    addtional_mwh = shares_per_wz.multiply(
357
        demand_tomorrow.sum() * 1000000 - ec_cts_ind.sum().sum()
358
    )
359
360
    # Calulate overall industrial demand for eGon100RE
361
    final_mwh = addtional_mwh + ec_cts_ind[addtional_mwh.index].sum()
362
363
    # Linear scale the industrial demands per nuts3 and wz to meet final demand
364
    ec_cts_ind[addtional_mwh.index] *= (
365
        final_mwh / ec_cts_ind[addtional_mwh.index].sum()
366
    )
367
368
    return ec_cts_ind
369
370
371
def adjust_cts_ind_nep(ec_cts_ind, sector):
372
    """Add electrical demand of new largescale CTS und industrial consumers
373
    according to NEP 2021, scneario C 2035. Values per federal state are
374
    linear distributed over all CTS branches and nuts3 regions.
375
376
    Parameters
377
    ----------
378
    ec_cts_ind : pandas.DataFrame
379
        CTS or industry demand without new largescale consumers.
380
381
    Returns
382
    -------
383
    ec_cts_ind : pandas.DataFrame
384
        CTS or industry demand including new largescale consumers.
385
386
    """
387
    sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][
388
        "sources"
389
    ]
390
391
    file_path = (
392
        Path(".")
393
        / "data_bundle_egon_data"
394
        / "nep2035_version2021"
395
        / sources["new_consumers_2035"]
396
    )
397
398
    # get data from NEP per federal state
399
    new_con = pd.read_csv(file_path, delimiter=";", decimal=",", index_col=0)
400
401
    # match nuts3 regions to federal states
402
    groups = ec_cts_ind.groupby(match_nuts3_bl().gen)
403
404
    # update demands per federal state
405
    for group in groups.indices.keys():
406
        g = groups.get_group(group)
407
        data_new = g.mul(1 + new_con[sector][group] * 1e6 / g.sum().sum())
408
        ec_cts_ind[ec_cts_ind.index.isin(g.index)] = data_new
409
410
    return ec_cts_ind
411
412
413
def disagg_households_power(
414
    scenario, year, weight_by_income=False, original=False, **kwargs
415
):
416
    """
417
    Perform spatial disaggregation of electric power in [GWh/a] by key and
418
    possibly weight by income.
419
    Similar to disaggregator.spatial.disagg_households_power
420
421
422
    Parameters
423
    ----------
424
    by : str
425
        must be one of ['households', 'population']
426
    weight_by_income : bool, optional
427
        Flag if to weight the results by the regional income (default False)
428
    orignal : bool, optional
429
        Throughput to function households_per_size,
430
        A flag if the results should be left untouched and returned in
431
        original form for the year 2011 (True) or if they should be scaled to
432
        the given `year` by the population in that year (False).
433
434
    Returns
435
    -------
436
    pd.DataFrame or pd.Series
437
    """
438
    # source: survey of energieAgenturNRW
439
    # with/without direct water heating (DHW), and weighted average
440
    # https://1-stromvergleich.com/wp-content/uploads/erhebung_wo_bleibt_der_strom.pdf
441
    demand_per_hh_size = pd.DataFrame(
442
        index=range(1, 7),
443
        data={
444
            # "weighted DWH": [2290, 3202, 4193, 4955, 5928, 5928],
445
            # "without DHW": [1714, 2812, 3704, 4432, 5317, 5317],
446
            "with_DHW": [2181, 3843, 5151, 6189, 7494, 8465],
447
            "without_DHW": [1798, 2850, 3733, 4480, 5311, 5816],
448
            "weighted": [2256, 3248, 4246, 5009, 5969, 6579],
449
        },
450
    )
451
452
    if scenario == "eGon100RE":
453
        # chose demand per household size from survey without DHW
454
        power_per_HH = (
455
            demand_per_hh_size["without_DHW"] / 1e3
456
        )  # TODO why without?
457
458
        # calculate demand per nuts3 in 2011
459
        df_2011 = data.households_per_size(year=2011) * power_per_HH
460
461
        # scale demand per hh-size to meet demand without heat
462
        # according to JRC in 2011 (136.6-(20.14+9.41) TWh)
463
        # TODO check source and method
464
        power_per_HH *= (136.6 - (20.14 + 9.41)) * 1e6 / df_2011.sum().sum()
465
466
        # calculate demand per nuts3 in 2050
467
        df = data.households_per_size(year=year) * power_per_HH
468
469
    # Bottom-Up: Power demand by household sizes in [MWh/a] for each scenario
470
    elif scenario in ["status2019", "status2023", "eGon2021", "eGon2035"]:
471
        # chose demand per household size from survey including weighted DHW
472
        power_per_HH = demand_per_hh_size["weighted"] / 1e3
473
474
        # calculate demand per nuts3
475
        df = (
476
            data.households_per_size(original=original, year=year)
477
            * power_per_HH
478
        )
479
480
        if scenario == "eGon2035":
481
            # scale to fit demand of NEP 2021 scebario C 2035 (119TWh)
482
            df *= 119 * 1e6 / df.sum().sum()
483
484
        if scenario == "status2023":
485
            # scale to fit demand of BDEW 2023 (130.48 TWh) see issue #180
486
            df *= 130.48 * 1e6 / df.sum().sum()
487
488
        # if scenario == "status2021": # TODO status2021
489
        #     # scale to fit demand of AGEB 2021 (138.6 TWh)
490
        #     # https://ag-energiebilanzen.de/wp-content/uploads/2023/01/AGEB_22p2_rev-1.pdf#page=10
491
        #     df *= 138.6 * 1e6 / df.sum().sum()
492
493
    else:
494
        print(
495
            f"Electric demand per household size for scenario {scenario} "
496
            "is not specified."
497
        )
498
499
    if weight_by_income:
500
        df = spatial.adjust_by_income(df=df)
0 ignored issues
show
introduced by
The variable df does not seem to be defined for all execution paths.
Loading history...
501
502
    return df
503
504
def write_demandregio_hh_profiles_to_db(hh_profiles):
505
    """Write HH demand profiles from demand regio into db. One row per
506
    year and nuts3. The annual load profile timeseries is an array.
507
508
    schema: demand
509
    tablename: demandregio_household_load_profiles
510
511
512
513
    Parameters
514
    ----------
515
    hh_profiles: pd.DataFrame
516
517
    Returns
518
    -------
519
    """
520
    years = hh_profiles.index.year.unique().values
521
    df_to_db = pd.DataFrame(columns=["id", "year", "nuts3", "load_in_mwh"]).set_index("id")
522
    dataset = egon.data.config.settings()["egon-data"]["--dataset-boundary"]
523
524
    if dataset == "Schleswig-Holstein":
525
        hh_profiles = hh_profiles.loc[
526
            :, hh_profiles.columns.str.contains("DEF0")]
527
528
    id = 0
529
    for year in years:
530
        df = hh_profiles[hh_profiles.index.year == year]
531
        for nuts3 in hh_profiles.columns:
532
            id+=1
533
            df_to_db.at[id, "year"] = year
534
            df_to_db.at[id, "nuts3"] = nuts3
535
            df_to_db.at[id, "load_in_mwh"] = df[nuts3].to_list()
536
537
    df_to_db["year"] = df_to_db["year"].apply(int)
538
    df_to_db["nuts3"] = df_to_db["nuts3"].astype(str)
539
    df_to_db["load_in_mwh"] = df_to_db["load_in_mwh"].apply(list)
540
    df_to_db = df_to_db.reset_index()
541
542
    DemandRegioLoadProfiles.__table__.drop(bind=db.engine(), checkfirst=True)
543
    DemandRegioLoadProfiles.__table__.create(bind=db.engine())
544
545
    df_to_db.to_sql(
546
        name=DemandRegioLoadProfiles.__table__.name,
547
        schema=DemandRegioLoadProfiles.__table__.schema,
548
        con=db.engine(),
549
        if_exists="append",
550
        index=-False,
551
    )
552
553
    return
554
555
def insert_hh_demand(scenario, year, engine):
556
    """Calculates electrical demands of private households using demandregio's
557
    disaggregator and insert results into the database.
558
559
    Parameters
560
    ----------
561
    scenario : str
562
        Name of the corresponding scenario.
563
    year : int
564
        The number of households per region is taken from this year.
565
566
    Returns
567
    -------
568
    None.
569
570
    """
571
    targets = egon.data.config.datasets()["demandregio_household_demand"][
572
        "targets"
573
    ]["household_demand"]
574
    # get demands of private households per nuts and size from demandregio
575
    ec_hh = disagg_households_power(scenario, year)
576
577
    # Select demands for nuts3-regions in boundaries (needed for testmode)
578
    ec_hh = data_in_boundaries(ec_hh)
579
580
    # insert into database
581
    for hh_size in ec_hh.columns:
582
        df = pd.DataFrame(ec_hh[hh_size])
583
        df["year"] = 2023 if scenario == "status2023" else year # TODO status2023
584
        # adhoc fix until ffeopendata servers are up and population_year can be set
585
586
        df["scenario"] = scenario
587
        df["hh_size"] = hh_size
588
        df = df.rename({hh_size: "demand"}, axis="columns")
589
        df.to_sql(
590
            targets["table"],
591
            engine,
592
            schema=targets["schema"],
593
            if_exists="append",
594
        )
595
596
    # insert housholds demand timeseries
597
    try:
598
        hh_load_timeseries = (
599
            temporal.disagg_temporal_power_housholds_slp(
600
                use_nuts3code=True,
601
                by="households",
602
                weight_by_income=False,
603
                year=year,
604
            )
605
            .resample("h")
606
            .sum()
607
        )
608
        hh_load_timeseries.rename(
609
            columns={"DEB16": "DEB1C", "DEB19": "DEB1D"}, inplace=True)
610
    except Exception as e:
611
        logger.warning(f"Couldnt get profiles from FFE, will use pickeld fallback! \n {e}")
612
        hh_load_timeseries = pd.read_pickle(Path(".", "df_load_profiles.pkl").resolve())
613
614
        def change_year(dt, year):
615
            return dt.replace(year=year)
616
617
        year = 2023 if scenario == "status2023" else year  # TODO status2023
618
        hh_load_timeseries.index = hh_load_timeseries.index.map(lambda dt: change_year(dt, year))
0 ignored issues
show
introduced by
The variable change_year does not seem to be defined for all execution paths.
Loading history...
619
620
        if scenario == "status2023":
621
            hh_load_timeseries = hh_load_timeseries.shift(24 * 2)
622
623
            hh_load_timeseries.iloc[:24 * 7] = hh_load_timeseries.iloc[24 * 7:24 * 7 * 2].values
624
625
    write_demandregio_hh_profiles_to_db(hh_load_timeseries)
626
627
628
def insert_cts_ind(scenario, year, engine, target_values):
629
    """Calculates electrical demands of CTS and industry using demandregio's
630
    disaggregator, adjusts them according to resulting values of NEP 2021 or
631
    JRC IDEES and insert results into the database.
632
633
    Parameters
634
    ----------
635
    scenario : str
636
        Name of the corresponing scenario.
637
    year : int
638
        The number of households per region is taken from this year.
639
    target_values : dict
640
        List of target values for each scenario and sector.
641
642
    Returns
643
    -------
644
    None.
645
646
    """
647
648
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
649
        "targets"
650
    ]
651
652
    for sector in ["CTS", "industry"]:
653
        # get demands per nuts3 and wz of demandregio
654
        ec_cts_ind = spatial.disagg_CTS_industry(
655
            use_nuts3code=True, source="power", sector=sector, year=year
656
        ).transpose()
657
658
        ec_cts_ind.index = ec_cts_ind.index.rename("nuts3")
659
660
        # exclude mobility sector from GHD
661
        ec_cts_ind = ec_cts_ind.drop(columns=49, errors="ignore")
662
663
        # scale values according to target_values
664
        if sector in target_values[scenario].keys():
665
            ec_cts_ind *= (
666
                target_values[scenario][sector] / ec_cts_ind.sum().sum()
667
            )
668
669
        # include new largescale consumers according to NEP 2021
670
        if scenario == "eGon2035":
671
            ec_cts_ind = adjust_cts_ind_nep(ec_cts_ind, sector)
672
        # include new industrial demands due to sector coupling
673
        if (scenario == "eGon100RE") & (sector == "industry"):
674
            ec_cts_ind = adjust_ind_pes(ec_cts_ind)
675
676
        # Select demands for nuts3-regions in boundaries (needed for testmode)
677
        ec_cts_ind = data_in_boundaries(ec_cts_ind)
678
679
        # insert into database
680
        for wz in ec_cts_ind.columns:
681
            df = pd.DataFrame(ec_cts_ind[wz])
682
            df["year"] = year
683
            df["wz"] = wz
684
            df["scenario"] = scenario
685
            df = df.rename({wz: "demand"}, axis="columns")
686
            df.index = df.index.rename("nuts3")
687
            df.to_sql(
688
                targets["cts_ind_demand"]["table"],
689
                engine,
690
                targets["cts_ind_demand"]["schema"],
691
                if_exists="append",
692
            )
693
694
695
def insert_household_demand():
696
    """Insert electrical demands for households according to
697
    demandregio using its disaggregator-tool in MWh
698
699
    Returns
700
    -------
701
    None.
702
703
    """
704
    targets = egon.data.config.datasets()["demandregio_household_demand"][
705
        "targets"
706
    ]
707
    engine = db.engine()
708
709
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
710
711
    for t in targets:
712
        db.execute_sql(
713
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
714
        )
715
716
    for scn in scenarios:
717
        year = scenario_parameters.global_settings(scn)["population_year"]
718
719
        # Insert demands of private households
720
        insert_hh_demand(scn, year, engine)
721
722
723
def insert_cts_ind_demands():
724
    """Insert electricity demands per nuts3-region in Germany according to
725
    demandregio using its disaggregator-tool in MWh
726
727
    Returns
728
    -------
729
    None.
730
731
    """
732
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
733
        "targets"
734
    ]
735
    engine = db.engine()
736
737
    for t in targets:
738
        db.execute_sql(
739
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
740
        )
741
742
    insert_cts_ind_wz_definitions()
743
744
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
745
746
747
    for scn in scenarios:
748
        year = scenario_parameters.global_settings(scn)["population_year"]
749
750
        if year > 2035:
751
            year = 2035
752
753
        # target values per scenario in MWh
754
        target_values = {
755
            # according to NEP 2021
756
            # new consumers will be added seperatly
757
            "eGon2035": {
758
                "CTS": 135300 * 1e3,
759
                "industry": 225400 * 1e3
760
            },
761
            # CTS: reduce overall demand from demandregio (without traffic)
762
            # by share of heat according to JRC IDEES, data from 2011
763
            # industry: no specific heat demand, use data from demandregio
764
            "eGon100RE": {
765
                "CTS": ((1 - (5.96 + 6.13) / 154.64) * 125183.403) * 1e3
766
            },
767
            # no adjustments for status quo
768
            "eGon2021": {},
769
            "status2019": {},
770
            "status2023": {
771
                "CTS": 121160 * 1e3,
772
                "industry": 200380 * 1e3
773
            },
774
        }
775
776
        insert_cts_ind(scn, year, engine, target_values)
777
778
    # Insert load curves per wz
779
    timeseries_per_wz()
780
781
782
def insert_society_data():
783
    """Insert population and number of households per nuts3-region in Germany
784
    according to demandregio using its disaggregator-tool
785
786
    Returns
787
    -------
788
    None.
789
790
    """
791
    targets = egon.data.config.datasets()["demandregio_society"]["targets"]
792
    engine = db.engine()
793
794
    for t in targets:
795
        db.execute_sql(
796
            f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};"
797
        )
798
799
    target_years = np.append(
800
        get_sector_parameters("global").population_year.values, 2018
801
    )
802
803
    for year in target_years:
804
        df_pop = pd.DataFrame(data.population(year=year))
805
        df_pop["year"] = year
806
        df_pop = df_pop.rename({"value": "population"}, axis="columns")
807
        # Select data for nuts3-regions in boundaries (needed for testmode)
808
        df_pop = data_in_boundaries(df_pop)
809
        df_pop.to_sql(
810
            targets["population"]["table"],
811
            engine,
812
            schema=targets["population"]["schema"],
813
            if_exists="append",
814
        )
815
816
    for year in target_years:
817
        df_hh = pd.DataFrame(data.households_per_size(year=year))
818
        # Select data for nuts3-regions in boundaries (needed for testmode)
819
        df_hh = data_in_boundaries(df_hh)
820
        for hh_size in df_hh.columns:
821
            df = pd.DataFrame(df_hh[hh_size])
822
            df["year"] = year
823
            df["hh_size"] = hh_size
824
            df = df.rename({hh_size: "households"}, axis="columns")
825
            df.to_sql(
826
                targets["household"]["table"],
827
                engine,
828
                schema=targets["household"]["schema"],
829
                if_exists="append",
830
            )
831
832
833
def insert_timeseries_per_wz(sector, year):
834
    """Insert normalized electrical load time series for the selected sector
835
836
    Parameters
837
    ----------
838
    sector : str
839
        Name of the sector. ['CTS', 'industry']
840
    year : int
841
        Selected weather year
842
843
    Returns
844
    -------
845
    None.
846
847
    """
848
    targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][
849
        "targets"
850
    ]
851
852
    if sector == "CTS":
853
        profiles = (
854
            data.CTS_power_slp_generator("SH", year=year)
855
            .drop(
856
                [
857
                    "Day",
858
                    "Hour",
859
                    "DayOfYear",
860
                    "WD",
861
                    "SA",
862
                    "SU",
863
                    "WIZ",
864
                    "SOZ",
865
                    "UEZ",
866
                ],
867
                axis="columns",
868
            )
869
            .resample("H")
870
            .sum()
871
        )
872
        wz_slp = config.slp_branch_cts_power()
873
    elif sector == "industry":
874
        profiles = (
875
            data.shift_load_profile_generator(state="SH", year=year)
876
            .resample("H")
877
            .sum()
878
        )
879
        wz_slp = config.shift_profile_industry()
880
881
    else:
882
        print(f"Sector {sector} is not valid.")
883
884
    df = pd.DataFrame(
885
        index=wz_slp.keys(), columns=["slp", "load_curve", "year"]
0 ignored issues
show
introduced by
The variable wz_slp does not seem to be defined for all execution paths.
Loading history...
886
    )
887
888
    df.index.rename("wz", inplace=True)
889
890
    df.slp = wz_slp.values()
891
892
    df.year = year
893
894
    df.load_curve = profiles[df.slp].transpose().values.tolist()
0 ignored issues
show
introduced by
The variable profiles does not seem to be defined for all execution paths.
Loading history...
895
896
    db.execute_sql(
897
        f"""
898
                   DELETE FROM {targets['timeseries_cts_ind']['schema']}.
899
                   {targets['timeseries_cts_ind']['table']}
900
                   WHERE wz IN (
901
                       SELECT wz FROM {targets['wz_definitions']['schema']}.
902
                       {targets['wz_definitions']['table']}
903
                       WHERE sector = '{sector}')
904
                   """
905
    )
906
907
    df.to_sql(
908
        targets["timeseries_cts_ind"]["table"],
909
        schema=targets["timeseries_cts_ind"]["schema"],
910
        con=db.engine(),
911
        if_exists="append",
912
    )
913
914
915
def timeseries_per_wz():
916
    """Calcultae and insert normalized timeseries per wz for cts and industry
917
918
    Returns
919
    -------
920
    None.
921
922
    """
923
924
    scenarios = egon.data.config.settings()["egon-data"]["--scenarios"]
925
926
    for scn in scenarios:
927
        year = int(scenario_parameters.global_settings(scn)["weather_year"])
928
929
        for sector in ["CTS", "industry"]:
930
            insert_timeseries_per_wz(sector, int(year))
931
932
933
def get_cached_tables():
934
    """Get cached demandregio tables and db-dump from former runs"""
935
    data_config = egon.data.config.datasets()
936
    for s in ["cache", "dbdump"]:
937
        url = data_config["demandregio_workaround"]["source"][s]["url"]
938
        target_path = data_config["demandregio_workaround"]["targets"][s]["path"]
939
        filename = os.path.basename(url)
940
        file_path = Path(".", target_path, filename).resolve()
941
        os.makedirs(file_path.parent, exist_ok=True)
942
        logger.info(f"Downloading: {filename} from {url}.")
943
        download_and_check(url, file_path, max_iteration=5)
944
        with zipfile.ZipFile(file_path, "r") as zip_ref:
945
            zip_ref.extractall(file_path.parent)
946
947