cts_heat()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 36
Code Lines 22

Duplication

Lines 36
Ratio 100 %

Importance

Changes 0
Metric Value
eloc 22
dl 36
loc 36
rs 9.352
c 0
b 0
f 0
cc 3
nop 0
1
"""
2
CTS electricity and heat demand time series for scenarios in 2035 and 2050
3
assigned to OSM-buildings are generated.
4
5
Disaggregation of CTS heat & electricity demand time series from MV substation
6
to census cells via annual demand and then to OSM buildings via
7
amenity tags or randomly if no sufficient OSM-data is available in the
8
respective census cell. If no OSM-buildings or synthetic residential buildings
9
are available new synthetic 5x5m buildings are generated.
10
"""
11
12
from geoalchemy2 import Geometry
13
from geoalchemy2.shape import to_shape
14
from psycopg2.extensions import AsIs, register_adapter
15
from sqlalchemy import REAL, Column, Integer, String, cast, func
16
from sqlalchemy.ext.declarative import declarative_base
17
import geopandas as gpd
18
import numpy as np
19
import pandas as pd
20
import saio
21
22
from egon.data import config, db
23
from egon.data import logger as log
24
from egon.data.datasets import Dataset
25
from egon.data.datasets.electricity_demand import (
26
    EgonDemandRegioZensusElectricity,
27
)
28
from egon.data.datasets.electricity_demand.temporal import (
29
    EgonEtragoElectricityCts,
30
)
31
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
32
    BuildingElectricityPeakLoads,
33
    OsmBuildingsSynthetic,
34
)
35
from egon.data.datasets.electricity_demand_timeseries.mapping import (
36
    map_all_used_buildings,
37
)
38
from egon.data.datasets.electricity_demand_timeseries.tools import (
39
    random_ints_until_sum,
40
    random_point_in_square,
41
    specific_int_until_sum,
42
    write_table_to_postgis,
43
    write_table_to_postgres,
44
)
45
from egon.data.datasets.heat_demand import EgonPetaHeat
46
from egon.data.datasets.heat_demand_timeseries import EgonEtragoHeatCts
47
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
48
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
49
50
engine = db.engine()
51
Base = declarative_base()
52
53
# import db tables
54
saio.register_schema("openstreetmap", engine=engine)
55
saio.register_schema("boundaries", engine=engine)
56
57
58
class EgonCtsElectricityDemandBuildingShare(Base):
59
    """
60
    Class definition of table demand.egon_cts_electricity_demand_building_share.
61
62
    Table including the MV substation electricity profile share of all selected
63
    CTS buildings for scenario eGon2035 and eGon100RE. This table is created
64
    within :func:`cts_electricity()`.
65
    """
66
67
    __tablename__ = "egon_cts_electricity_demand_building_share"
68
    __table_args__ = {"schema": "demand"}
69
70
    building_id = Column(Integer, primary_key=True)
71
    scenario = Column(String, primary_key=True)
72
    bus_id = Column(Integer, index=True)
73
    profile_share = Column(REAL)
74
75
76
class EgonCtsHeatDemandBuildingShare(Base):
77
    """
78
    Class definition of table demand.egon_cts_heat_demand_building_share.
79
80
    Table including the MV substation heat profile share of all selected
81
    CTS buildings for scenario eGon2035 and eGon100RE. This table is created
82
    within :func:`cts_heat()`.
83
    """
84
85
    __tablename__ = "egon_cts_heat_demand_building_share"
86
    __table_args__ = {"schema": "demand"}
87
88
    building_id = Column(Integer, primary_key=True)
89
    scenario = Column(String, primary_key=True)
90
    bus_id = Column(Integer, index=True)
91
    profile_share = Column(REAL)
92
93
94
class CtsBuildings(Base):
95
    """
96
    Class definition of table openstreetmap.egon_cts_buildings.
97
98
    Table of all selected CTS buildings with id, census cell id, geometry and
99
    amenity count in building. This table is created within
100
    :func:`cts_buildings()`.
101
    """
102
103
    __tablename__ = "egon_cts_buildings"
104
    __table_args__ = {"schema": "openstreetmap"}
105
106
    serial = Column(Integer, primary_key=True)
107
    id = Column(Integer, index=True)
108
    zensus_population_id = Column(Integer, index=True)
109
    geom_building = Column(Geometry("Polygon", 3035))
110
    n_amenities_inside = Column(Integer)
111
    source = Column(String)
112
113
114 View Code Duplication
class BuildingHeatPeakLoads(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
115
    """
116
    Class definition of table demand.egon_building_heat_peak_loads.
117
118
    """
119
120
    __tablename__ = "egon_building_heat_peak_loads"
121
    __table_args__ = {"schema": "demand"}
122
123
    building_id = Column(Integer, primary_key=True)
124
    scenario = Column(String, primary_key=True)
125
    sector = Column(String, primary_key=True)
126
    peak_load_in_w = Column(REAL)
127
128
129
class CtsDemandBuildings(Dataset):
130
    """
131
    Generates CTS electricity and heat demand time series for scenarios in 2035 and 2050
132
    assigned to OSM-buildings.
133
134
    Disaggregation of CTS heat & electricity demand time series from HV-MV substation
135
    to census cells via annual demand per census cell and then to OSM buildings via
136
    amenity tags or randomly if no sufficient OSM-data is available in the
137
    respective census cell. If no OSM-buildings or synthetic residential buildings
138
    are available new synthetic 5x5m buildings are generated.
139
140
    For more information see data documentation on :ref:`disagg-cts-elec-ref`.
141
142
    *Dependencies*
143
      * :py:class:`OsmBuildingsStreets <egon.data.datasets.osm_buildings_streets.OsmBuildingsStreets>`
144
      * :py:class:`CtsElectricityDemand <egon.data.datasets.electricity_demand.CtsElectricityDemand>`
145
      * :py:class:`hh_buildings <egon.data.datasets.electricity_demand_timeseries.hh_buildings>`
146
      * :py:class:`HeatTimeSeries <egon.data.datasets.heat_demand_timeseries.HeatTimeSeries>` (more specifically the :func:`export_etrago_cts_heat_profiles <egon.data.datasets.heat_demand_timeseries.export_etrago_cts_heat_profiles>` task)
147
148
    *Resulting tables*
149
      * :py:class:`openstreetmap.osm_buildings_synthetic <egon.data.datasets.electricity_demand_timeseries.hh_buildings.OsmBuildingsSynthetic>` is extended
150
      * :py:class:`openstreetmap.egon_cts_buildings <egon.data.datasets.electricity_demand_timeseries.cts_buildings.CtsBuildings>` is created
151
      * :py:class:`demand.egon_cts_electricity_demand_building_share <egon.data.datasets.electricity_demand_timeseries.cts_buildings.EgonCtsElectricityDemandBuildingShare>` is created
152
      * :py:class:`demand.egon_cts_heat_demand_building_share <egon.data.datasets.electricity_demand_timeseries.cts_buildings.EgonCtsHeatDemandBuildingShare>` is created
153
      * :py:class:`demand.egon_building_electricity_peak_loads <egon.data.datasets.electricity_demand_timeseries.hh_buildings.BuildingElectricityPeakLoads>` is extended
154
      * :py:class:`boundaries.egon_map_zensus_mvgd_buildings <egon.data.datasets.electricity_demand_timeseries.mapping.EgonMapZensusMvgdBuildings>` is extended.
155
156
    **The following datasets from the database are mainly used for creation:**
157
158
    * `openstreetmap.osm_buildings_filtered`:
159
       Table of OSM-buildings filtered by tags to selecting residential and cts
160
       buildings only.
161
    * `openstreetmap.osm_amenities_shops_filtered`:
162
       Table of OSM-amenities filtered by tags to select cts only.
163
    * `openstreetmap.osm_amenities_not_in_buildings_filtered`:
164
       Table of amenities which do not intersect with any building from
165
       `openstreetmap.osm_buildings_filtered`
166
    * `openstreetmap.osm_buildings_synthetic`:
167
       Table of synthetic residential buildings
168
    * `boundaries.egon_map_zensus_buildings_filtered_all`:
169
       Mapping table of census cells and buildings filtered even if population
170
       in census cell = 0.
171
    * `demand.egon_demandregio_zensus_electricity`:
172
       Table of annual electricity load demand for residential and cts at census
173
       cell level. Residential load demand is derived from aggregated residential
174
       building profiles. DemandRegio CTS load demand at NUTS3 is distributed to
175
       census cells linearly to heat demand from peta5.
176
    * `demand.egon_peta_heat`:
177
       Table of annual heat load demand for residential and cts at census cell
178
       level from peta5.
179
    * `demand.egon_etrago_electricity_cts`:
180
       Scaled cts electricity time series for every MV substation. Derived from
181
       DemandRegio SLP for selected economic sectors at nuts3. Scaled with annual
182
       demand from `demand.egon_demandregio_zensus_electricity`
183
    * `demand.egon_etrago_heat_cts`:
184
       Scaled cts heat time series for every MV substation. Derived from
185
       DemandRegio SLP Gas for selected economic sectors at nuts3. Scaled with
186
       annual demand from `demand.egon_peta_heat`.
187
188
    **What is the challenge?**
189
190
    The OSM, DemandRegio and Peta5 dataset differ from each other. The OSM dataset
191
    is a community based dataset which is extended throughout and does not claim to
192
    be complete. Therefore, not all census cells which have a demand assigned by
193
    DemandRegio or Peta5 methodology also have buildings with respective tags or
194
    sometimes even any building at all. Furthermore, the substation load areas are
195
    determined dynamically in a previous dataset. Merging these datasets different
196
    scopes (census cell shapes, building shapes) and their inconsistencies need to
197
    be addressed. For example: not yet tagged buildings or amenities in OSM, or
198
    building shapes exceeding census cells.
199
200
    **What are central assumptions during the data processing?**
201
202
    * We assume OSM data to be the most reliable and complete open source dataset.
203
    * We assume building and amenity tags to be truthful and accurate.
204
    * Mapping census to OSM data is not trivial. Discrepancies are substituted.
205
    * Missing OSM buildings are generated for each amenity.
206
    * Missing amenities are generated by median value of amenities/census cell.
207
208
    **Drawbacks and limitations of the data**
209
210
    * Shape of profiles for each building is similar within a MVGD and only scaled
211
      with a different factor.
212
    * MVGDs are generated dynamically. In case of buildings with amenities
213
      exceeding MVGD borders, amenities which are assigned to a different MVGD than
214
      the assigned building centroid, the amenities are dropped for sake of
215
      simplicity. One building should not have a connection to two MVGDs.
216
    * The completeness of the OSM data depends on community contribution and is
217
      crucial to the quality of our results.
218
    * Randomly selected buildings and generated amenities may inadequately reflect
219
      reality, but are chosen for sake of simplicity as a measure to fill data gaps.
220
    * Since this dataset is a cascade after generation of synthetic residential
221
      buildings also check drawbacks and limitations in hh_buildings.py.
222
    * Synthetic buildings may be placed within osm buildings which exceed multiple
223
      census cells. This is currently accepted but may be solved in #953.
224
    * Scattered high peak loads occur and might lead to single MV grid connections
225
      in ding0. In some cases this might not be viable. Postprocessing is needed and
226
      may be solved in #954.
227
228
    """
229
230
    #:
231
    name: str = "CtsDemandBuildings"
232
    #:
233
    version: str = "0.0.4"
234
235
    def __init__(self, dependencies):
236
        super().__init__(
237
            name=self.name,
238
            version=self.version,
239
            dependencies=dependencies,
240
            tasks=(
241
                cts_buildings,  # TODO: status2023, currently fixed for only 2023
242
                {cts_electricity, cts_heat},
243
                get_cts_electricity_peak_load,
244
                map_all_used_buildings,
245
                assign_voltage_level_to_buildings,
246
            ),
247
        )
248
249
250
def amenities_without_buildings():
251
    """
252
    Amenities which have no buildings assigned and are in a cell with cts
253
    demand are determined.
254
255
    Returns
256
    -------
257
    pd.DataFrame
258
        Table of amenities without buildings
259
    """
260
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
261
262
    with db.session_scope() as session:
263
        scn_query = session.query(
264
            func.distinct(EgonDemandRegioZensusElectricity.scenario)
265
        )
266
267
    # FIXME: Cells with CTS demand, amenities and buildings do not change within the
268
    #  scenarios, only the demand itself. Therefore any scenario can be used
269
    #  universally to determine the cts buildings but not for the demand share.
270
    scn = db.select_dataframe(scn_query.statement).iat[0, 0]
271
272
    with db.session_scope() as session:
273
        cells_query = (
274
            session.query(
275
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
276
                osm_amenities_not_in_buildings_filtered.geom_amenity,
277
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
278
            )
279
            .filter(
280
                func.st_within(
281
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
282
                    DestatisZensusPopulationPerHa.geom,
283
                )
284
            )
285
            .filter(
286
                DestatisZensusPopulationPerHa.id
287
                == EgonDemandRegioZensusElectricity.zensus_population_id
288
            )
289
            .filter(
290
                EgonDemandRegioZensusElectricity.sector == "service",
291
                EgonDemandRegioZensusElectricity.scenario == scn,
292
            )
293
        )
294
295
    df_amenities_without_buildings = gpd.read_postgis(
296
        cells_query.statement,
297
        cells_query.session.bind,
298
        geom_col="geom_amenity",
299
    )
300
    return df_amenities_without_buildings
301
302
303
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
304
    """
305
    Building centroids are placed randomly within census cells.
306
    The Number of buildings is derived from n_amenity_inside, the selected
307
    method and number of amenities per building.
308
309
    Returns
310
    -------
311
    df: gpd.GeoDataFrame
312
        Table of buildings centroids
313
    """
314
    if isinstance(max_amenities, int):
315
        # amount of amenities is randomly generated within bounds
316
        # (max_amenities, amenities per cell)
317
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
318
            random_ints_until_sum, args=[max_amenities]
319
        )
320
    if isinstance(amenities, int):
321
        # Specific amount of amenities per building
322
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
323
            specific_int_until_sum, args=[amenities]
324
        )
325
326
    # Unnest each building
327
    df = df.explode(column="n_amenities_inside")
328
329
    # building count per cell
330
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
331
332
    # generate random synthetic buildings
333
    edge_length = 5
334
    # create random points within census cells
335
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
336
337
    df.reset_index(drop=True, inplace=True)
338
    # Store center of polygon
339
    df["geom_point"] = points
340
    # Drop geometry of census cell
341
    df = df.drop(columns=["geom"])
342
343
    return df
344
345
346
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
347
    """
348
    Synthetic buildings are generated around points.
349
350
    Parameters
351
    ----------
352
    df: pd.DataFrame
353
        Table of census cells
354
    points: gpd.GeoSeries or str
355
        List of points to place buildings around or column name of df
356
    crs: str
357
        CRS of result table
358
359
    Returns
360
    -------
361
    df: gpd.GeoDataFrame
362
        Synthetic buildings
363
    """
364
365
    if isinstance(points, str) and points in df.columns:
366
        points = df[points]
367
    elif isinstance(points, gpd.GeoSeries):
368
        pass
369
    else:
370
        raise ValueError("Points are of the wrong type")
371
372
    # Create building using a square around point
373
    edge_length = 5
374
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
375
376
    if "geom_point" not in df.columns:
377
        df["geom_point"] = df["geom_building"].centroid
378
379
    df = gpd.GeoDataFrame(
380
        df,
381
        crs=crs,
382
        geometry="geom_building",
383
    )
384
385
    # TODO remove after #772 implementation of egon_building_id
386
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
387
388
    # get max number of building ids from synthetic table
389
    with db.session_scope() as session:
390
        max_synth_building_id = session.execute(
391
            func.max(cast(OsmBuildingsSynthetic.id, Integer))
392
        ).scalar()
393
    max_synth_building_id = int(max_synth_building_id)
394
395
    # create sequential ids
396
    df["egon_building_id"] = range(
397
        max_synth_building_id + 1,
398
        max_synth_building_id + df.shape[0] + 1,
399
    )
400
401
    df["area"] = df["geom_building"].area
402
    # set building type of synthetic building
403
    df["building"] = "cts"
404
    # TODO remove after #772
405
    df = df.rename(
406
        columns={
407
            # "zensus_population_id": "cell_id",
408
            "egon_building_id": "id",
409
        }
410
    )
411
    return df
412
413
414
def buildings_with_amenities():
415
    """
416
    Amenities which are assigned to buildings are determined and grouped per
417
    building and zensus cell. Buildings covering multiple cells therefore
418
    exist multiple times but in different zensus cells. This is necessary to
419
    cover as many cells with a cts demand as possible. If buildings exist in
420
    multiple mvgds (bus_id), only the amenities within the same as the
421
    building centroid are kept. If as a result, a census cell is uncovered
422
    by any buildings, a synthetic amenity is placed. The buildings are
423
    aggregated afterwards during the calculation of the profile_share.
424
425
    Returns
426
    -------
427
    df_buildings_with_amenities: gpd.GeoDataFrame
428
        Contains all buildings with amenities per zensus cell.
429
    df_lost_cells: gpd.GeoDataFrame
430
        Contains synthetic amenities in lost cells. Might be empty
431
    """
432
433
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
434
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
435
436
    for scn in config.settings()["egon-data"]["--scenarios"]:
437
        with db.session_scope() as session:
438
            cells_query = (
439
                session.query(
440
                    osm_amenities_in_buildings_filtered,
441
                    MapZensusGridDistricts.bus_id,
442
                )
443
                .filter(
444
                    MapZensusGridDistricts.zensus_population_id
445
                    == osm_amenities_in_buildings_filtered.zensus_population_id
446
                )
447
                .filter(
448
                    EgonDemandRegioZensusElectricity.zensus_population_id
449
                    == osm_amenities_in_buildings_filtered.zensus_population_id
450
                )
451
                .filter(
452
                    EgonDemandRegioZensusElectricity.sector == "service",
453
                    EgonDemandRegioZensusElectricity.scenario == scn,
454
                )
455
            )
456
            df_amenities_in_buildings = pd.read_sql(
457
                cells_query.statement, con=session.connection(), index_col=None
458
            )
459
        if df_amenities_in_buildings.empty:
460
            break
461
462
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
0 ignored issues
show
introduced by
The variable df_amenities_in_buildings does not seem to be defined in case the for loop on line 436 is not entered. Are you sure this can never be the case?
Loading history...
463
        "geom_building"
464
    ].apply(to_shape)
465
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
466
        "geom_amenity"
467
    ].apply(to_shape)
468
469
    # retrieve building centroid bus_id
470
    with db.session_scope() as session:
471
        cells_query = session.query(
472
            egon_map_zensus_buildings_filtered_all.id,
473
            MapZensusGridDistricts.bus_id.label("building_bus_id"),
474
        ).filter(
475
            egon_map_zensus_buildings_filtered_all.zensus_population_id
476
            == MapZensusGridDistricts.zensus_population_id
477
        )
478
479
        df_building_bus_id = pd.read_sql(
480
            cells_query.statement, con=session.connection(), index_col=None
481
        )
482
483
    df_amenities_in_buildings = pd.merge(
484
        left=df_amenities_in_buildings, right=df_building_bus_id, on="id"
485
    )
486
487
    # identify amenities with differing bus_id as building
488
    identified_amenities = df_amenities_in_buildings.loc[
489
        df_amenities_in_buildings["bus_id"]
490
        != df_amenities_in_buildings["building_bus_id"]
491
    ].index
492
493
    lost_cells = df_amenities_in_buildings.loc[
494
        identified_amenities, "zensus_population_id"
495
    ].unique()
496
497
    # check if lost zensus cells are already covered
498
    if not (
499
        df_amenities_in_buildings["zensus_population_id"]
500
        .isin(lost_cells)
501
        .empty
502
    ):
503
        # query geom data for cell if not
504
        with db.session_scope() as session:
505
            cells_query = session.query(
506
                DestatisZensusPopulationPerHa.id,
507
                DestatisZensusPopulationPerHa.geom,
508
            ).filter(
509
                DestatisZensusPopulationPerHa.id.in_(pd.Index(lost_cells))
510
            )
511
512
            df_lost_cells = gpd.read_postgis(
513
                cells_query.statement,
514
                cells_query.session.bind,
515
                geom_col="geom",
516
            )
517
518
        # place random amenity in cell
519
        df_lost_cells["n_amenities_inside"] = 1
520
        df_lost_cells.rename(
521
            columns={
522
                "id": "zensus_population_id",
523
            },
524
            inplace=True,
525
        )
526
        df_lost_cells = place_buildings_with_amenities(
527
            df_lost_cells, amenities=1
528
        )
529
        df_lost_cells.rename(
530
            columns={
531
                # "id": "zensus_population_id",
532
                "geom_point": "geom_amenity",
533
            },
534
            inplace=True,
535
        )
536
        df_lost_cells.drop(
537
            columns=["building_count", "n_amenities_inside"], inplace=True
538
        )
539
    else:
540
        df_lost_cells = None
541
542
    df_amenities_in_buildings.drop(identified_amenities, inplace=True)
543
    df_amenities_in_buildings.drop(columns="building_bus_id", inplace=True)
544
545
    df_amenities_in_buildings["n_amenities_inside"] = 1
546
547
    # sum amenities per building and cell
548
    df_amenities_in_buildings["n_amenities_inside"] = (
549
        df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
550
            "n_amenities_inside"
551
        ].transform("sum")
552
    )
553
    # drop duplicated buildings
554
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
555
        ["id", "zensus_population_id"]
556
    )
557
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
558
559
    df_buildings_with_amenities = df_buildings_with_amenities[
560
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
561
    ]
562
    df_buildings_with_amenities.rename(
563
        columns={
564
            # "zensus_population_id": "cell_id",
565
            "egon_building_id": "id"
566
        },
567
        inplace=True,
568
    )
569
570
    return df_buildings_with_amenities, df_lost_cells
571
572
573
def buildings_without_amenities():
574
    """
575
    Buildings (filtered and synthetic) in cells with
576
    cts demand but no amenities are determined.
577
578
    Returns
579
    -------
580
    df_buildings_without_amenities: gpd.GeoDataFrame
581
        Table of buildings without amenities in zensus cells
582
        with cts demand.
583
    """
584
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
585
    from saio.openstreetmap import (
586
        osm_amenities_shops_filtered,
587
        osm_buildings_filtered,
588
        osm_buildings_synthetic,
589
    )
590
591
    with db.session_scope() as session:
592
        scn_query = session.query(
593
            func.distinct(EgonDemandRegioZensusElectricity.scenario)
594
        )
595
596
    # FIXME: Cells with CTS demand, amenities and buildings do not change within the
597
    #  scenarios, only the demand itself. Therefore any scenario can be used
598
    #  universally to determine the cts buildings but not for the demand share.
599
    scn = db.select_dataframe(scn_query.statement).iat[0, 0]
600
601
    # buildings_filtered in cts-demand-cells without amenities
602
    with db.session_scope() as session:
603
        # Synthetic Buildings
604
        q_synth_buildings = session.query(
605
            osm_buildings_synthetic.cell_id.cast(Integer).label(
606
                "zensus_population_id"
607
            ),
608
            osm_buildings_synthetic.id.cast(Integer).label("id"),
609
            osm_buildings_synthetic.area.label("area"),
610
            osm_buildings_synthetic.geom_building.label("geom_building"),
611
            osm_buildings_synthetic.geom_point.label("geom_point"),
612
        )
613
614
        # Buildings filtered
615
        q_buildings_filtered = session.query(
616
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
617
            osm_buildings_filtered.id,
618
            osm_buildings_filtered.area,
619
            osm_buildings_filtered.geom_building,
620
            osm_buildings_filtered.geom_point,
621
        ).filter(
622
            osm_buildings_filtered.id
623
            == egon_map_zensus_buildings_filtered_all.id
624
        )
625
626
        # Amenities + zensus_population_id
627
        q_amenities = (
628
            session.query(
629
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
630
            )
631
            .filter(
632
                func.st_within(
633
                    osm_amenities_shops_filtered.geom_amenity,
634
                    DestatisZensusPopulationPerHa.geom,
635
                )
636
            )
637
            .distinct(DestatisZensusPopulationPerHa.id)
638
        )
639
640
        # Cells with CTS demand but without amenities
641
        q_cts_without_amenities = (
642
            session.query(
643
                EgonDemandRegioZensusElectricity.zensus_population_id,
644
            )
645
            .filter(
646
                EgonDemandRegioZensusElectricity.sector == "service",
647
                EgonDemandRegioZensusElectricity.scenario == scn,
648
            )
649
            .filter(
650
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
651
                    q_amenities
652
                )
653
            )
654
            .distinct()
655
        )
656
657
        # Buildings filtered + synthetic buildings residential in
658
        # cells with CTS demand but without amenities
659
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
660
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
661
                q_cts_without_amenities
662
            )
663
        )
664
665
    # df_buildings_without_amenities = pd.read_sql(
666
    #     cells_query.statement, cells_query.session.bind, index_col=None)
667
    df_buildings_without_amenities = gpd.read_postgis(
668
        cells_query.statement,
669
        cells_query.session.bind,
670
        geom_col="geom_building",
671
    )
672
673
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
674
        columns={
675
            # "zensus_population_id": "cell_id",
676
            "egon_building_id": "id",
677
        }
678
    )
679
680
    return df_buildings_without_amenities
681
682
683
def select_cts_buildings(df_buildings_wo_amenities, max_n):
684
    """
685
    N Buildings (filtered and synthetic) in each cell with
686
    cts demand are selected. Only the first n buildings
687
    are taken for each cell. The buildings are sorted by surface
688
    area.
689
690
    Returns
691
    -------
692
    df_buildings_with_cts_demand: gpd.GeoDataFrame
693
        Table of buildings
694
    """
695
696
    df_buildings_wo_amenities.sort_values(
697
        "area", ascending=False, inplace=True
698
    )
699
    # select first n ids each census cell if available
700
    df_buildings_with_cts_demand = (
701
        df_buildings_wo_amenities.groupby("zensus_population_id")
702
        .nth(list(range(max_n)))
703
        .reset_index()
704
    )
705
    df_buildings_with_cts_demand.reset_index(drop=True, inplace=True)
706
707
    return df_buildings_with_cts_demand
708
709
710
def cells_with_cts_demand_only(df_buildings_without_amenities):
711
    """
712
    Cells with cts demand but no amenities or buildilngs
713
    are determined.
714
715
    Returns
716
    -------
717
    df_cells_only_cts_demand: gpd.GeoDataFrame
718
        Table of cells with cts demand but no amenities or buildings
719
    """
720
    from saio.openstreetmap import osm_amenities_shops_filtered
721
722
    with db.session_scope() as session:
723
        scn_query = session.query(
724
            func.distinct(EgonDemandRegioZensusElectricity.scenario)
725
        )
726
727
    # FIXME: Cells with CTS demand, amenities and buildings do not change within the
728
    #  scenarios, only the demand itself. Therefore any scenario can be used
729
    #  universally to determine the cts buildings but not for the demand share.
730
    scn = db.select_dataframe(scn_query.statement).iat[0, 0]
731
732
    # cells mit amenities
733
    with db.session_scope() as session:
734
        sub_query = (
735
            session.query(
736
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
737
            )
738
            .filter(
739
                func.st_within(
740
                    osm_amenities_shops_filtered.geom_amenity,
741
                    DestatisZensusPopulationPerHa.geom,
742
                )
743
            )
744
            .distinct(DestatisZensusPopulationPerHa.id)
745
        )
746
747
        cells_query = (
748
            session.query(
749
                EgonDemandRegioZensusElectricity.zensus_population_id,
750
                EgonDemandRegioZensusElectricity.scenario,
751
                EgonDemandRegioZensusElectricity.sector,
752
                EgonDemandRegioZensusElectricity.demand,
753
                DestatisZensusPopulationPerHa.geom,
754
            )
755
            .filter(
756
                EgonDemandRegioZensusElectricity.sector == "service",
757
                EgonDemandRegioZensusElectricity.scenario == scn,
758
            )
759
            .filter(
760
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
761
                    sub_query
762
                )
763
            )
764
            .filter(
765
                EgonDemandRegioZensusElectricity.zensus_population_id
766
                == DestatisZensusPopulationPerHa.id
767
            )
768
        )
769
770
    df_cts_cell_without_amenities = gpd.read_postgis(
771
        cells_query.statement,
772
        cells_query.session.bind,
773
        geom_col="geom",
774
        index_col=None,
775
    )
776
777
    # TODO remove after #722
778
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
779
        columns={"cell_id": "zensus_population_id"}
780
    )
781
782
    # Census cells with only cts demand
783
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
784
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
785
            df_buildings_without_amenities["zensus_population_id"].unique()
786
        )
787
    ]
788
789
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
790
791
    return df_cells_only_cts_demand
792
793
794
def calc_census_cell_share(scenario, sector):
795
    """
796
    The profile share for each census cell is calculated by it's
797
    share of annual demand per substation bus. The annual demand
798
    per cell is defined by DemandRegio/Peta5. The share is for both
799
    scenarios identical as the annual demand is linearly scaled.
800
801
    Parameters
802
    ----------
803
    scenario: str
804
        Scenario for which the share is calculated: "eGon2035" or "eGon100RE"
805
    sector: str
806
        Scenario for which the share is calculated: "electricity" or "heat"
807
808
    Returns
809
    -------
810
    df_census_share: pd.DataFrame
811
    """
812
    if sector == "electricity":
813
        with db.session_scope() as session:
814
            cells_query = (
815
                session.query(
816
                    EgonDemandRegioZensusElectricity,
817
                    MapZensusGridDistricts.bus_id,
818
                )
819
                .filter(EgonDemandRegioZensusElectricity.sector == "service")
820
                .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
821
                .filter(
822
                    EgonDemandRegioZensusElectricity.zensus_population_id
823
                    == MapZensusGridDistricts.zensus_population_id
824
                )
825
            )
826
827
    elif sector == "heat":
828
        with db.session_scope() as session:
829
            cells_query = (
830
                session.query(EgonPetaHeat, MapZensusGridDistricts.bus_id)
831
                .filter(EgonPetaHeat.sector == "service")
832
                .filter(EgonPetaHeat.scenario == scenario)
833
                .filter(
834
                    EgonPetaHeat.zensus_population_id
835
                    == MapZensusGridDistricts.zensus_population_id
836
                )
837
            )
838
839
    df_demand = pd.read_sql(
840
        cells_query.statement,
0 ignored issues
show
introduced by
The variable cells_query does not seem to be defined for all execution paths.
Loading history...
841
        cells_query.session.bind,
842
        index_col="zensus_population_id",
843
    )
844
845
    # get demand share of cell per bus
846
    df_census_share = df_demand["demand"] / df_demand.groupby("bus_id")[
847
        "demand"
848
    ].transform("sum")
849
    df_census_share = df_census_share.rename("cell_share")
850
851
    df_census_share = pd.concat(
852
        [
853
            df_census_share,
854
            df_demand[["bus_id", "scenario"]],
855
        ],
856
        axis=1,
857
    )
858
859
    df_census_share.reset_index(inplace=True)
860
    return df_census_share
861
862
863
def calc_building_demand_profile_share(
864
    df_cts_buildings, scenario="eGon2035", sector="electricity"
865
):
866
    """
867
    Share of cts electricity demand profile per bus for every selected building
868
    is calculated. Building-amenity share is multiplied with census cell share
869
    to get the substation bus profile share for each building. The share is
870
    grouped and aggregated per building as some buildings exceed the shape of
871
    census cells and have amenities assigned from multiple cells. Building
872
    therefore get the amenity share of all census cells.
873
874
    Parameters
875
    ----------
876
    df_cts_buildings: gpd.GeoDataFrame
877
        Table of all buildings with cts demand assigned
878
    scenario: str
879
        Scenario for which the share is calculated.
880
    sector: str
881
        Sector for which the share is calculated.
882
883
    Returns
884
    -------
885
    df_building_share: pd.DataFrame
886
        Table of bus profile share per building
887
888
    """
889
890
    def calc_building_amenity_share(df_cts_buildings):
891
        """
892
        Calculate the building share by the number amenities per building
893
        within a census cell. Building ids can exist multiple time but with
894
        different zensus_population_ids.
895
        """
896
        df_building_amenity_share = df_cts_buildings[
897
            "n_amenities_inside"
898
        ] / df_cts_buildings.groupby("zensus_population_id")[
899
            "n_amenities_inside"
900
        ].transform(
901
            "sum"
902
        )
903
        df_building_amenity_share = pd.concat(
904
            [
905
                df_building_amenity_share.rename("building_amenity_share"),
906
                df_cts_buildings[["zensus_population_id", "id"]],
907
            ],
908
            axis=1,
909
        )
910
        return df_building_amenity_share
911
912
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
913
914
    df_census_cell_share = calc_census_cell_share(
915
        scenario=scenario, sector=sector
916
    )
917
918
    df_demand_share = pd.merge(
919
        left=df_building_amenity_share,
920
        right=df_census_cell_share,
921
        left_on="zensus_population_id",
922
        right_on="zensus_population_id",
923
    )
924
    df_demand_share["profile_share"] = df_demand_share[
925
        "building_amenity_share"
926
    ].multiply(df_demand_share["cell_share"])
927
928
    # only pass selected columns
929
    df_demand_share = df_demand_share[
930
        ["id", "bus_id", "scenario", "profile_share"]
931
    ]
932
933
    # Group and aggregate per building for multi cell buildings
934
    df_demand_share = (
935
        df_demand_share.groupby(["scenario", "id", "bus_id"])
936
        .sum()
937
        .reset_index()
938
    )
939
    if df_demand_share.duplicated("id", keep=False).any():
940
        print(
941
            df_demand_share.loc[df_demand_share.duplicated("id", keep=False)]
942
        )
943
    return df_demand_share
944
945
946 View Code Duplication
def get_peta_demand(mvgd, scenario):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
947
    """
948
    Retrieve annual peta heat demand for CTS for either
949
    eGon2035 or eGon100RE scenario.
950
951
    Parameters
952
    ----------
953
    mvgd : int
954
        ID of substation for which to get CTS demand.
955
    scenario : str
956
        Possible options are eGon2035 or eGon100RE
957
958
    Returns
959
    -------
960
    df_peta_demand : pd.DataFrame
961
        Annual residential heat demand per building and scenario. Columns of
962
        the dataframe are zensus_population_id and demand.
963
964
    """
965
966
    with db.session_scope() as session:
967
        query = (
968
            session.query(
969
                MapZensusGridDistricts.zensus_population_id,
970
                EgonPetaHeat.demand,
971
            )
972
            .filter(MapZensusGridDistricts.bus_id == int(mvgd))
973
            .filter(
974
                MapZensusGridDistricts.zensus_population_id
975
                == EgonPetaHeat.zensus_population_id
976
            )
977
            .filter(
978
                EgonPetaHeat.sector == "service",
979
                EgonPetaHeat.scenario == scenario,
980
            )
981
        )
982
983
        df_peta_demand = pd.read_sql(
984
            query.statement, query.session.bind, index_col=None
985
        )
986
987
    return df_peta_demand
988
989
990
def calc_cts_building_profiles(
991
    bus_ids,
992
    scenario,
993
    sector,
994
):
995
    """
996
    Calculate the cts demand profile for each building. The profile is
997
    calculated by the demand share of the building per substation bus.
998
999
    Parameters
1000
    ----------
1001
    bus_ids: list of int
1002
        Ids of the substation for which selected building profiles are
1003
        calculated.
1004
    scenario: str
1005
        Scenario for which the share is calculated: "eGon2035" or "eGon100RE"
1006
    sector: str
1007
        Sector for which the share is calculated: "electricity" or "heat"
1008
1009
    Returns
1010
    -------
1011
    df_building_profiles: pd.DataFrame
1012
        Table of demand profile per building. Column names are building IDs
1013
        and index is hour of the year as int (0-8759).
1014
1015
    """
1016
    if sector == "electricity":
1017
        # Get cts building electricity demand share of selected buildings
1018
        with db.session_scope() as session:
1019
            cells_query = (
1020
                session.query(
1021
                    EgonCtsElectricityDemandBuildingShare,
1022
                )
1023
                .filter(
1024
                    EgonCtsElectricityDemandBuildingShare.scenario == scenario
1025
                )
1026
                .filter(
1027
                    EgonCtsElectricityDemandBuildingShare.bus_id.in_(bus_ids)
1028
                )
1029
            )
1030
1031
        df_demand_share = pd.read_sql(
1032
            cells_query.statement, cells_query.session.bind, index_col=None
1033
        )
1034
1035
        # Get substation cts electricity load profiles of selected bus_ids
1036
        with db.session_scope() as session:
1037
            cells_query = (
1038
                session.query(EgonEtragoElectricityCts).filter(
1039
                    EgonEtragoElectricityCts.scn_name == scenario
1040
                )
1041
            ).filter(EgonEtragoElectricityCts.bus_id.in_(bus_ids))
1042
1043
        df_cts_substation_profiles = pd.read_sql(
1044
            cells_query.statement,
1045
            cells_query.session.bind,
1046
        )
1047
        df_cts_substation_profiles = pd.DataFrame.from_dict(
1048
            df_cts_substation_profiles.set_index("bus_id")["p_set"].to_dict(),
1049
            orient="index",
1050
        )
1051
        # df_cts_profiles = calc_load_curves_cts(scenario)
1052
1053
    elif sector == "heat":
1054
        # Get cts building heat demand share of selected buildings
1055
        with db.session_scope() as session:
1056
            cells_query = (
1057
                session.query(
1058
                    EgonCtsHeatDemandBuildingShare,
1059
                )
1060
                .filter(EgonCtsHeatDemandBuildingShare.scenario == scenario)
1061
                .filter(EgonCtsHeatDemandBuildingShare.bus_id.in_(bus_ids))
1062
            )
1063
1064
        df_demand_share = pd.read_sql(
1065
            cells_query.statement, cells_query.session.bind, index_col=None
1066
        )
1067
1068
        # Get substation cts heat load profiles of selected bus_ids
1069
        # (this profile only contains zensus cells with individual heating;
1070
        #  in order to obtain a profile for the whole MV grid it is afterwards
1071
        #  scaled by the grids total CTS demand from peta)
1072
        with db.session_scope() as session:
1073
            cells_query = (
1074
                session.query(EgonEtragoHeatCts).filter(
1075
                    EgonEtragoHeatCts.scn_name == scenario
1076
                )
1077
            ).filter(EgonEtragoHeatCts.bus_id.in_(bus_ids))
1078
1079
        df_cts_substation_profiles = pd.read_sql(
1080
            cells_query.statement,
1081
            cells_query.session.bind,
1082
        )
1083
        df_cts_substation_profiles = pd.DataFrame.from_dict(
1084
            df_cts_substation_profiles.set_index("bus_id")["p_set"].to_dict(),
1085
            orient="index",
1086
        )
1087
        for bus_id in bus_ids:
1088
            if bus_id in df_cts_substation_profiles.index:
1089
                # get peta demand to scale load profile to
1090
                peta_cts_demand = get_peta_demand(bus_id, scenario)
1091
                scaling_factor = (
1092
                    peta_cts_demand.demand.sum()
1093
                    / df_cts_substation_profiles.loc[bus_id, :].sum()
1094
                )
1095
                # scale load profile
1096
                df_cts_substation_profiles.loc[bus_id, :] *= scaling_factor
1097
1098
    else:
1099
        raise KeyError("Sector needs to be either 'electricity' or 'heat'")
1100
1101
    # TODO remove after #722
1102
    df_demand_share.rename(columns={"id": "building_id"}, inplace=True)
1103
1104
    # get demand profile for all buildings for selected demand share
1105
    df_building_profiles = pd.DataFrame()
1106
    for bus_id, df in df_demand_share.groupby("bus_id"):
1107
        shares = df.set_index("building_id", drop=True)["profile_share"]
1108
        try:
1109
            profile_ts = df_cts_substation_profiles.loc[bus_id]
1110
        except KeyError:
1111
            # This should only happen within the SH cutout
1112
            log.info(
1113
                f"No CTS profile found for substation with bus_id:"
1114
                f" {bus_id}"
1115
            )
1116
            continue
1117
1118
        building_profiles = np.outer(profile_ts, shares)
1119
        building_profiles = pd.DataFrame(
1120
            building_profiles, index=profile_ts.index, columns=shares.index
1121
        )
1122
        df_building_profiles = pd.concat(
1123
            [df_building_profiles, building_profiles], axis=1
1124
        )
1125
1126
    return df_building_profiles
1127
1128
1129
def delete_synthetic_cts_buildings():
1130
    """
1131
    All synthetic cts buildings are deleted from the DB. This is necessary if
1132
    the task is run multiple times as the existing synthetic buildings
1133
    influence the results.
1134
    """
1135
    # import db tables
1136
    from saio.openstreetmap import osm_buildings_synthetic
1137
1138
    # cells mit amenities
1139
    with db.session_scope() as session:
1140
        session.query(osm_buildings_synthetic).filter(
1141
            osm_buildings_synthetic.building == "cts"
1142
        ).delete()
1143
1144
1145
def remove_double_bus_id(df_cts_buildings):
1146
    """This is an backup adhoc fix if there should still be a building which
1147
    is assigned to 2 substations. In this case one of the buildings is just
1148
    dropped. As this currently accounts for only one building with one amenity
1149
    the deviation is neglectable."""
1150
    # assign bus_id via census cell of amenity
1151
    with db.session_scope() as session:
1152
        cells_query = session.query(
1153
            MapZensusGridDistricts.zensus_population_id,
1154
            MapZensusGridDistricts.bus_id,
1155
        )
1156
1157
    df_egon_map_zensus_buildings_buses = pd.read_sql(
1158
        cells_query.statement,
1159
        cells_query.session.bind,
1160
        index_col=None,
1161
    )
1162
    df_cts_buildings = pd.merge(
1163
        left=df_cts_buildings,
1164
        right=df_egon_map_zensus_buildings_buses,
1165
        on="zensus_population_id",
1166
    )
1167
1168
    substation_per_building = df_cts_buildings.groupby("id")[
1169
        "bus_id"
1170
    ].nunique()
1171
    building_id = substation_per_building.loc[
1172
        substation_per_building > 1
1173
    ].index
1174
    df_duplicates = df_cts_buildings.loc[
1175
        df_cts_buildings["id"].isin(building_id)
1176
    ]
1177
    for unique_id in df_duplicates["id"].unique():
1178
        drop_index = df_duplicates[df_duplicates["id"] == unique_id].index[0]
1179
        print(
1180
            f"Buildings {df_cts_buildings.loc[drop_index, 'id']}"
1181
            f" dropped because of double substation"
1182
        )
1183
        df_cts_buildings.drop(index=drop_index, inplace=True)
1184
1185
    df_cts_buildings.drop(columns="bus_id", inplace=True)
1186
1187
    return df_cts_buildings
1188
1189
1190
def cts_buildings():
1191
    """
1192
    Assigns CTS demand to buildings and calculates the respective demand
1193
    profiles. The demand profile per substation are disaggregated per
1194
    annual demand share of each census cell and by the number of amenities
1195
    per building within the cell. If no building data is available,
1196
    synthetic buildings are generated around the amenities. If no amenities
1197
    but cts demand is available, buildings are randomly selected. If no
1198
    building nor amenity is available, random synthetic buildings are
1199
    generated. The demand share is stored in the database.
1200
1201
    Note
1202
    -----
1203
    Cells with CTS demand, amenities and buildings do not change within
1204
    the scenarios, only the demand itself. Therefore any scenario
1205
    can be used universally to determine the cts buildings but not for
1206
    the demand share.
1207
    """
1208
1209
    # ========== Register np datatypes with SQLA ==========
1210
    def adapt_numpy_float64(numpy_float64):
1211
        return AsIs(numpy_float64)
1212
1213
    def adapt_numpy_int64(numpy_int64):
1214
        return AsIs(numpy_int64)
1215
1216
    register_adapter(np.float64, adapt_numpy_float64)
1217
    register_adapter(np.int64, adapt_numpy_int64)
1218
    # =====================================================
1219
1220
    log.info("Start logging!")
1221
    # Buildings with amenities
1222
    df_buildings_with_amenities, df_lost_cells = (
1223
        buildings_with_amenities()
1224
    )  # TODO: status2023 this is fixed to 2023
1225
    log.info("Buildings with amenities selected!")
1226
1227
    # Median number of amenities per cell
1228
    median_n_amenities = int(
1229
        df_buildings_with_amenities.groupby("zensus_population_id")[
1230
            "n_amenities_inside"
1231
        ]
1232
        .sum()
1233
        .median()
1234
    )
1235
    log.info(f"Median amenity value: {median_n_amenities}")
1236
1237
    # Remove synthetic CTS buildings if existing
1238
    delete_synthetic_cts_buildings()
1239
    log.info("Old synthetic cts buildings deleted!")
1240
1241
    # Amenities not assigned to buildings
1242
    df_amenities_without_buildings = amenities_without_buildings()
1243
    log.info("Amenities without buildings selected!")
1244
1245
    # Append lost cells due to duplicated ids, to cover all demand cells
1246
    if not df_lost_cells.empty:
1247
        # Number of synth amenities per cell
1248
        df_lost_cells["amenities"] = median_n_amenities
1249
        # create row for every amenity
1250
        df_lost_cells["amenities"] = (
1251
            df_lost_cells["amenities"].astype(int).apply(range)
1252
        )
1253
        df_lost_cells = df_lost_cells.explode("amenities")
1254
        df_lost_cells.drop(columns="amenities", inplace=True)
1255
        df_amenities_without_buildings = pd.concat(
1256
            [df_amenities_without_buildings, df_lost_cells], ignore_index=True
1257
        )
1258
        log.info(
1259
            f"{df_lost_cells.shape[0]} lost cells due to substation "
1260
            f"intersection appended!"
1261
        )
1262
1263
    # One building per amenity
1264
    df_amenities_without_buildings["n_amenities_inside"] = 1
1265
    # Create synthetic buildings for amenites without buildings
1266
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
1267
        df_amenities_without_buildings, points="geom_amenity"
1268
    )
1269
    log.info("Synthetic buildings created!")
1270
1271
    # TODO remove renaming after #722
1272
    write_table_to_postgis(
1273
        df_synthetic_buildings_with_amenities.rename(
1274
            columns={
1275
                "zensus_population_id": "cell_id",
1276
                "egon_building_id": "id",
1277
            }
1278
        ),
1279
        OsmBuildingsSynthetic,
1280
        engine=engine,
1281
        drop=False,
1282
    )
1283
    log.info("Synthetic buildings exported to DB!")
1284
1285
    # Cells without amenities but CTS demand and buildings
1286
    df_buildings_without_amenities = buildings_without_amenities()
1287
    log.info("Buildings without amenities in demand cells identified!")
1288
1289
    # Backup Bugfix for duplicated buildings which occure in SQL-Querry
1290
    # drop building ids which have already been used
1291
    mask = df_buildings_without_amenities.loc[
1292
        df_buildings_without_amenities["id"].isin(
1293
            df_buildings_with_amenities["id"]
1294
        )
1295
    ].index
1296
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
1297
        index=mask
1298
    ).reset_index(drop=True)
1299
    log.info(f"{len(mask)} duplicated ids removed!")
1300
1301
    # select median n buildings per cell
1302
    df_buildings_without_amenities = select_cts_buildings(
1303
        df_buildings_without_amenities, max_n=median_n_amenities
1304
    )
1305
    df_buildings_without_amenities["n_amenities_inside"] = 1
1306
    log.info(f"{median_n_amenities} buildings per cell selected!")
1307
1308
    # Create synthetic amenities and buildings in cells with only CTS demand
1309
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
1310
        df_buildings_without_amenities
1311
    )
1312
    log.info("Cells with only demand identified!")
1313
1314
    # TODO implement overlay prevention #953 here
1315
    # Median n Amenities per cell
1316
    df_cells_with_cts_demand_only["amenities"] = median_n_amenities
1317
    # create row for every amenity
1318
    df_cells_with_cts_demand_only["amenities"] = (
1319
        df_cells_with_cts_demand_only["amenities"].astype(int).apply(range)
1320
    )
1321
    df_cells_with_cts_demand_only = df_cells_with_cts_demand_only.explode(
1322
        "amenities"
1323
    )
1324
    df_cells_with_cts_demand_only.drop(columns="amenities", inplace=True)
1325
1326
    # Only 1 Amenity per Building
1327
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
1328
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
1329
        df_cells_with_cts_demand_only, amenities=1
1330
    )
1331
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
1332
        df_cells_with_cts_demand_only, points="geom_point"
1333
    )
1334
    log.info(f"{median_n_amenities} synthetic buildings per cell created")
1335
1336
    # TODO remove renaming after #722
1337
    write_table_to_postgis(
1338
        df_synthetic_buildings_without_amenities.rename(
1339
            columns={
1340
                "zensus_population_id": "cell_id",
1341
                "egon_building_id": "id",
1342
            }
1343
        ),
1344
        OsmBuildingsSynthetic,
1345
        engine=engine,
1346
        drop=False,
1347
    )
1348
    log.info("Synthetic buildings exported to DB")
1349
1350
    # Concat all buildings
1351
    columns = [
1352
        "zensus_population_id",
1353
        "id",
1354
        "geom_building",
1355
        "n_amenities_inside",
1356
        "source",
1357
    ]
1358
1359
    df_buildings_with_amenities["source"] = "bwa"
1360
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
1361
    df_buildings_without_amenities["source"] = "bwoa"
1362
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
1363
1364
    df_cts_buildings = pd.concat(
1365
        [
1366
            df_buildings_with_amenities[columns],
1367
            df_synthetic_buildings_with_amenities[columns],
1368
            df_buildings_without_amenities[columns],
1369
            df_synthetic_buildings_without_amenities[columns],
1370
        ],
1371
        axis=0,
1372
        ignore_index=True,
1373
    )
1374
    df_cts_buildings = remove_double_bus_id(df_cts_buildings)
1375
    log.info("Double bus_id checked")
1376
1377
    # TODO remove dypte correction after #722
1378
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
1379
1380
    df_cts_buildings = gpd.GeoDataFrame(
1381
        df_cts_buildings, geometry="geom_building", crs=3035
1382
    )
1383
    df_cts_buildings = df_cts_buildings.reset_index().rename(
1384
        columns={"index": "serial"}
1385
    )
1386
1387
    # Write table to db for debugging and postprocessing
1388
    write_table_to_postgis(
1389
        df_cts_buildings,
1390
        CtsBuildings,
1391
        engine=engine,
1392
        drop=True,
1393
    )
1394
    log.info("CTS buildings exported to DB!")
1395
1396
1397 View Code Duplication
def cts_electricity():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1398
    """
1399
    Calculate cts electricity demand share of hvmv substation profile
1400
     for buildings.
1401
    """
1402
    log.info("Start logging!")
1403
    with db.session_scope() as session:
1404
        cells_query = session.query(CtsBuildings)
1405
1406
    df_cts_buildings = pd.read_sql(
1407
        cells_query.statement, cells_query.session.bind, index_col=None
1408
    )
1409
    log.info("CTS buildings from DB imported!")
1410
1411
    df_demand_share = pd.DataFrame()
1412
1413
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1414
        df_demand_share_per_scenario = calc_building_demand_profile_share(
1415
            df_cts_buildings, scenario=scenario, sector="electricity"
1416
        )
1417
        log.info(f"Profile share for {scenario} calculated!")
1418
1419
        df_demand_share = pd.concat(
1420
            [df_demand_share, df_demand_share_per_scenario],
1421
            axis=0,
1422
            ignore_index=True,
1423
        )
1424
1425
    df_demand_share.rename(columns={"id": "building_id"}, inplace=True)
1426
1427
    write_table_to_postgres(
1428
        df_demand_share,
1429
        EgonCtsElectricityDemandBuildingShare,
1430
        drop=True,
1431
    )
1432
    log.info("Profile share exported to DB!")
1433
1434
1435 View Code Duplication
def cts_heat():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1436
    """
1437
    Calculate cts electricity demand share of hvmv substation profile
1438
     for buildings.
1439
    """
1440
    log.info("Start logging!")
1441
    with db.session_scope() as session:
1442
        cells_query = session.query(CtsBuildings)
1443
1444
    df_cts_buildings = pd.read_sql(
1445
        cells_query.statement, cells_query.session.bind, index_col=None
1446
    )
1447
    log.info("CTS buildings from DB imported!")
1448
1449
    df_demand_share = pd.DataFrame()
1450
1451
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1452
        df_demand_share_per_scenario = calc_building_demand_profile_share(
1453
            df_cts_buildings, scenario=scenario, sector="heat"
1454
        )
1455
        log.info(f"Profile share for {scenario} calculated!")
1456
1457
        df_demand_share = pd.concat(
1458
            [df_demand_share, df_demand_share_per_scenario],
1459
            axis=0,
1460
            ignore_index=True,
1461
        )
1462
1463
    df_demand_share.rename(columns={"id": "building_id"}, inplace=True)
1464
1465
    write_table_to_postgres(
1466
        df_demand_share,
1467
        EgonCtsHeatDemandBuildingShare,
1468
        drop=True,
1469
    )
1470
    log.info("Profile share exported to DB!")
1471
1472
1473 View Code Duplication
def get_cts_electricity_peak_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1474
    """
1475
    Get electricity peak load of all CTS buildings for both scenarios and
1476
    store in DB.
1477
    """
1478
    log.info("Start logging!")
1479
1480
    BuildingElectricityPeakLoads.__table__.create(bind=engine, checkfirst=True)
1481
1482
    # Delete rows with cts demand
1483
    with db.session_scope() as session:
1484
        session.query(BuildingElectricityPeakLoads).filter(
1485
            BuildingElectricityPeakLoads.sector == "cts"
1486
        ).delete()
1487
    log.info("Cts electricity peak load removed from DB!")
1488
1489
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1490
        with db.session_scope() as session:
1491
            cells_query = session.query(
1492
                EgonCtsElectricityDemandBuildingShare
1493
            ).filter(
1494
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
1495
            )
1496
1497
        df_demand_share = pd.read_sql(
1498
            cells_query.statement, cells_query.session.bind, index_col=None
1499
        )
1500
1501
        with db.session_scope() as session:
1502
            cells_query = session.query(EgonEtragoElectricityCts).filter(
1503
                EgonEtragoElectricityCts.scn_name == scenario
1504
            )
1505
1506
        df_cts_profiles = pd.read_sql(
1507
            cells_query.statement,
1508
            cells_query.session.bind,
1509
        )
1510
        df_cts_profiles = pd.DataFrame.from_dict(
1511
            df_cts_profiles.set_index("bus_id")["p_set"].to_dict(),
1512
            orient="columns",
1513
        )
1514
1515
        df_peak_load = pd.merge(
1516
            left=df_cts_profiles.max().astype(float).rename("max"),
1517
            right=df_demand_share,
1518
            left_index=True,
1519
            right_on="bus_id",
1520
        )
1521
1522
        # Convert unit from MWh to W
1523
        df_peak_load["max"] = df_peak_load["max"] * 1e6
1524
        df_peak_load["peak_load_in_w"] = (
1525
            df_peak_load["max"] * df_peak_load["profile_share"]
1526
        )
1527
        log.info(f"Peak load for {scenario} determined!")
1528
1529
        # TODO remove after #772
1530
        df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1531
        df_peak_load["sector"] = "cts"
1532
1533
        # # Write peak loads into db
1534
        write_table_to_postgres(
1535
            df_peak_load,
1536
            BuildingElectricityPeakLoads,
1537
            drop=False,
1538
            index=False,
1539
            if_exists="append",
1540
        )
1541
1542
        log.info(f"Peak load for {scenario} exported to DB!")
1543
1544
1545 View Code Duplication
def get_cts_heat_peak_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1546
    """
1547
    Get heat peak load of all CTS buildings for both scenarios and store in DB.
1548
    """
1549
    log.info("Start logging!")
1550
1551
    BuildingHeatPeakLoads.__table__.create(bind=engine, checkfirst=True)
1552
1553
    # Delete rows with cts demand
1554
    with db.session_scope() as session:
1555
        session.query(BuildingHeatPeakLoads).filter(
1556
            BuildingHeatPeakLoads.sector == "cts"
1557
        ).delete()
1558
    log.info("Cts heat peak load removed from DB!")
1559
1560
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1561
        with db.session_scope() as session:
1562
            cells_query = session.query(
1563
                EgonCtsElectricityDemandBuildingShare
1564
            ).filter(
1565
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
1566
            )
1567
1568
        df_demand_share = pd.read_sql(
1569
            cells_query.statement, cells_query.session.bind, index_col=None
1570
        )
1571
        log.info(f"Retrieved demand share for scenario: {scenario}")
1572
1573
        with db.session_scope() as session:
1574
            cells_query = session.query(EgonEtragoHeatCts).filter(
1575
                EgonEtragoHeatCts.scn_name == scenario
1576
            )
1577
1578
        df_cts_profiles = pd.read_sql(
1579
            cells_query.statement,
1580
            cells_query.session.bind,
1581
        )
1582
        log.info(f"Retrieved substation profiles for scenario: {scenario}")
1583
1584
        df_cts_profiles = pd.DataFrame.from_dict(
1585
            df_cts_profiles.set_index("bus_id")["p_set"].to_dict(),
1586
            orient="columns",
1587
        )
1588
1589
        df_peak_load = pd.merge(
1590
            left=df_cts_profiles.max().astype(float).rename("max"),
1591
            right=df_demand_share,
1592
            left_index=True,
1593
            right_on="bus_id",
1594
        )
1595
1596
        # Convert unit from MWh to W
1597
        df_peak_load["max"] = df_peak_load["max"] * 1e6
1598
        df_peak_load["peak_load_in_w"] = (
1599
            df_peak_load["max"] * df_peak_load["profile_share"]
1600
        )
1601
        log.info(f"Peak load for {scenario} determined!")
1602
1603
        # TODO remove after #772
1604
        df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1605
        df_peak_load["sector"] = "cts"
1606
1607
        # # Write peak loads into db
1608
        write_table_to_postgres(
1609
            df_peak_load,
1610
            BuildingHeatPeakLoads,
1611
            drop=False,
1612
            index=False,
1613
            if_exists="append",
1614
        )
1615
1616
        log.info(f"Peak load for {scenario} exported to DB!")
1617
1618
1619
def assign_voltage_level_to_buildings():
1620
    """
1621
    Add voltage level to all buildings by summed peak demand.
1622
1623
    All entries with same building id get the voltage level corresponding
1624
    to their summed residential and cts peak demand.
1625
    """
1626
1627
    with db.session_scope() as session:
1628
        cells_query = session.query(BuildingElectricityPeakLoads)
1629
1630
        df_peak_loads = pd.read_sql(
1631
            cells_query.statement,
1632
            cells_query.session.bind,
1633
        )
1634
1635
    df_peak_load_buildings = df_peak_loads.groupby(
1636
        ["building_id", "scenario"]
1637
    )["peak_load_in_w"].sum()
1638
    df_peak_load_buildings = df_peak_load_buildings.to_frame()
1639
    df_peak_load_buildings.loc[:, "voltage_level"] = 0
1640
1641
    # Identify voltage_level by thresholds defined in the eGon project
1642
    df_peak_load_buildings.loc[
1643
        df_peak_load_buildings["peak_load_in_w"] <= 0.1 * 1e6, "voltage_level"
1644
    ] = 7
1645
    df_peak_load_buildings.loc[
1646
        df_peak_load_buildings["peak_load_in_w"] > 0.1 * 1e6, "voltage_level"
1647
    ] = 6
1648
    df_peak_load_buildings.loc[
1649
        df_peak_load_buildings["peak_load_in_w"] > 0.2 * 1e6, "voltage_level"
1650
    ] = 5
1651
    df_peak_load_buildings.loc[
1652
        df_peak_load_buildings["peak_load_in_w"] > 5.5 * 1e6, "voltage_level"
1653
    ] = 4
1654
    df_peak_load_buildings.loc[
1655
        df_peak_load_buildings["peak_load_in_w"] > 20 * 1e6, "voltage_level"
1656
    ] = 3
1657
    df_peak_load_buildings.loc[
1658
        df_peak_load_buildings["peak_load_in_w"] > 120 * 1e6, "voltage_level"
1659
    ] = 1
1660
1661
    df_peak_load = pd.merge(
1662
        left=df_peak_loads.drop(columns="voltage_level"),
1663
        right=df_peak_load_buildings["voltage_level"],
1664
        how="left",
1665
        left_on=["building_id", "scenario"],
1666
        right_index=True,
1667
    )
1668
1669
    # Write peak loads into db
1670
    # remove table and replace by new
1671
    write_table_to_postgres(
1672
        df_peak_load,
1673
        BuildingElectricityPeakLoads,
1674
        drop=True,
1675
        index=False,
1676
        if_exists="append",
1677
    )
1678