cts_heat()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 36
Code Lines 22

Duplication

Lines 36
Ratio 100 %

Importance

Changes 0
Metric Value
eloc 22
dl 36
loc 36
rs 9.352
c 0
b 0
f 0
cc 3
nop 0
1
"""
2
CTS electricity and heat demand time series for scenarios in 2035 and 2050
3
assigned to OSM-buildings are generated.
4
5
Disaggregation of CTS heat & electricity demand time series from MV substation
6
to census cells via annual demand and then to OSM buildings via
7
amenity tags or randomly if no sufficient OSM-data is available in the
8
respective census cell. If no OSM-buildings or synthetic residential buildings
9
are available new synthetic 5x5m buildings are generated.
10
"""
11
12
from geoalchemy2 import Geometry
13
from geoalchemy2.shape import to_shape
14
from psycopg2.extensions import AsIs, register_adapter
15
from sqlalchemy import REAL, Column, Integer, String, func, cast
16
from sqlalchemy.ext.declarative import declarative_base
17
import geopandas as gpd
18
import numpy as np
19
import pandas as pd
20
import saio
21
22
from egon.data import config, db
23
from egon.data import logger as log
24
from egon.data.datasets import Dataset
25
from egon.data.datasets.electricity_demand import (
26
    EgonDemandRegioZensusElectricity,
27
)
28
from egon.data.datasets.electricity_demand.temporal import (
29
    EgonEtragoElectricityCts,
30
)
31
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
32
    BuildingElectricityPeakLoads,
33
    OsmBuildingsSynthetic,
34
)
35
from egon.data.datasets.electricity_demand_timeseries.mapping import (
36
    map_all_used_buildings,
37
)
38
from egon.data.datasets.electricity_demand_timeseries.tools import (
39
    random_ints_until_sum,
40
    random_point_in_square,
41
    specific_int_until_sum,
42
    write_table_to_postgis,
43
    write_table_to_postgres,
44
)
45
from egon.data.datasets.heat_demand import EgonPetaHeat
46
from egon.data.datasets.heat_demand_timeseries import EgonEtragoHeatCts
47
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
48
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
49
50
engine = db.engine()
51
Base = declarative_base()
52
53
# import db tables
54
saio.register_schema("openstreetmap", engine=engine)
55
saio.register_schema("boundaries", engine=engine)
56
57
58
class EgonCtsElectricityDemandBuildingShare(Base):
59
    """
60
    Class definition of table demand.egon_cts_electricity_demand_building_share.
61
62
    Table including the MV substation electricity profile share of all selected
63
    CTS buildings for scenario eGon2035 and eGon100RE. This table is created
64
    within :func:`cts_electricity()`.
65
    """
66
    __tablename__ = "egon_cts_electricity_demand_building_share"
67
    __table_args__ = {"schema": "demand"}
68
69
    building_id = Column(Integer, primary_key=True)
70
    scenario = Column(String, primary_key=True)
71
    bus_id = Column(Integer, index=True)
72
    profile_share = Column(REAL)
73
74
75
class EgonCtsHeatDemandBuildingShare(Base):
76
    """
77
    Class definition of table demand.egon_cts_heat_demand_building_share.
78
79
    Table including the MV substation heat profile share of all selected
80
    CTS buildings for scenario eGon2035 and eGon100RE. This table is created
81
    within :func:`cts_heat()`.
82
    """
83
    __tablename__ = "egon_cts_heat_demand_building_share"
84
    __table_args__ = {"schema": "demand"}
85
86
    building_id = Column(Integer, primary_key=True)
87
    scenario = Column(String, primary_key=True)
88
    bus_id = Column(Integer, index=True)
89
    profile_share = Column(REAL)
90
91
92
class CtsBuildings(Base):
93
    """
94
    Class definition of table openstreetmap.egon_cts_buildings.
95
96
    Table of all selected CTS buildings with id, census cell id, geometry and
97
    amenity count in building. This table is created within
98
    :func:`cts_buildings()`.
99
    """
100
    __tablename__ = "egon_cts_buildings"
101
    __table_args__ = {"schema": "openstreetmap"}
102
103
    serial = Column(Integer, primary_key=True)
104
    id = Column(Integer, index=True)
105
    zensus_population_id = Column(Integer, index=True)
106
    geom_building = Column(Geometry("Polygon", 3035))
107
    n_amenities_inside = Column(Integer)
108
    source = Column(String)
109
110
111 View Code Duplication
class BuildingHeatPeakLoads(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
112
    """
113
    Class definition of table demand.egon_building_heat_peak_loads.
114
115
    """
116
    __tablename__ = "egon_building_heat_peak_loads"
117
    __table_args__ = {"schema": "demand"}
118
119
    building_id = Column(Integer, primary_key=True)
120
    scenario = Column(String, primary_key=True)
121
    sector = Column(String, primary_key=True)
122
    peak_load_in_w = Column(REAL)
123
124
125
class CtsDemandBuildings(Dataset):
126
    """
127
    Generates CTS electricity and heat demand time series for scenarios in 2035 and 2050
128
    assigned to OSM-buildings.
129
130
    Disaggregation of CTS heat & electricity demand time series from HV-MV substation
131
    to census cells via annual demand per census cell and then to OSM buildings via
132
    amenity tags or randomly if no sufficient OSM-data is available in the
133
    respective census cell. If no OSM-buildings or synthetic residential buildings
134
    are available new synthetic 5x5m buildings are generated.
135
136
    For more information see data documentation on :ref:`disagg-cts-elec-ref`.
137
138
    *Dependencies*
139
      * :py:class:`OsmBuildingsStreets <egon.data.datasets.osm_buildings_streets.OsmBuildingsStreets>`
140
      * :py:class:`CtsElectricityDemand <egon.data.datasets.electricity_demand.CtsElectricityDemand>`
141
      * :py:class:`hh_buildings <egon.data.datasets.electricity_demand_timeseries.hh_buildings>`
142
      * :py:class:`HeatTimeSeries <egon.data.datasets.heat_demand_timeseries.HeatTimeSeries>` (more specifically the :func:`export_etrago_cts_heat_profiles <egon.data.datasets.heat_demand_timeseries.export_etrago_cts_heat_profiles>` task)
143
144
    *Resulting tables*
145
      * :py:class:`openstreetmap.osm_buildings_synthetic <egon.data.datasets.electricity_demand_timeseries.hh_buildings.OsmBuildingsSynthetic>` is extended
146
      * :py:class:`openstreetmap.egon_cts_buildings <egon.data.datasets.electricity_demand_timeseries.cts_buildings.CtsBuildings>` is created
147
      * :py:class:`demand.egon_cts_electricity_demand_building_share <egon.data.datasets.electricity_demand_timeseries.cts_buildings.EgonCtsElectricityDemandBuildingShare>` is created
148
      * :py:class:`demand.egon_cts_heat_demand_building_share <egon.data.datasets.electricity_demand_timeseries.cts_buildings.EgonCtsHeatDemandBuildingShare>` is created
149
      * :py:class:`demand.egon_building_electricity_peak_loads <egon.data.datasets.electricity_demand_timeseries.hh_buildings.BuildingElectricityPeakLoads>` is extended
150
      * :py:class:`boundaries.egon_map_zensus_mvgd_buildings <egon.data.datasets.electricity_demand_timeseries.mapping.EgonMapZensusMvgdBuildings>` is extended.
151
152
    **The following datasets from the database are mainly used for creation:**
153
154
    * `openstreetmap.osm_buildings_filtered`:
155
       Table of OSM-buildings filtered by tags to selecting residential and cts
156
       buildings only.
157
    * `openstreetmap.osm_amenities_shops_filtered`:
158
       Table of OSM-amenities filtered by tags to select cts only.
159
    * `openstreetmap.osm_amenities_not_in_buildings_filtered`:
160
       Table of amenities which do not intersect with any building from
161
       `openstreetmap.osm_buildings_filtered`
162
    * `openstreetmap.osm_buildings_synthetic`:
163
       Table of synthetic residential buildings
164
    * `boundaries.egon_map_zensus_buildings_filtered_all`:
165
       Mapping table of census cells and buildings filtered even if population
166
       in census cell = 0.
167
    * `demand.egon_demandregio_zensus_electricity`:
168
       Table of annual electricity load demand for residential and cts at census
169
       cell level. Residential load demand is derived from aggregated residential
170
       building profiles. DemandRegio CTS load demand at NUTS3 is distributed to
171
       census cells linearly to heat demand from peta5.
172
    * `demand.egon_peta_heat`:
173
       Table of annual heat load demand for residential and cts at census cell
174
       level from peta5.
175
    * `demand.egon_etrago_electricity_cts`:
176
       Scaled cts electricity time series for every MV substation. Derived from
177
       DemandRegio SLP for selected economic sectors at nuts3. Scaled with annual
178
       demand from `demand.egon_demandregio_zensus_electricity`
179
    * `demand.egon_etrago_heat_cts`:
180
       Scaled cts heat time series for every MV substation. Derived from
181
       DemandRegio SLP Gas for selected economic sectors at nuts3. Scaled with
182
       annual demand from `demand.egon_peta_heat`.
183
184
    **What is the challenge?**
185
186
    The OSM, DemandRegio and Peta5 dataset differ from each other. The OSM dataset
187
    is a community based dataset which is extended throughout and does not claim to
188
    be complete. Therefore, not all census cells which have a demand assigned by
189
    DemandRegio or Peta5 methodology also have buildings with respective tags or
190
    sometimes even any building at all. Furthermore, the substation load areas are
191
    determined dynamically in a previous dataset. Merging these datasets different
192
    scopes (census cell shapes, building shapes) and their inconsistencies need to
193
    be addressed. For example: not yet tagged buildings or amenities in OSM, or
194
    building shapes exceeding census cells.
195
196
    **What are central assumptions during the data processing?**
197
198
    * We assume OSM data to be the most reliable and complete open source dataset.
199
    * We assume building and amenity tags to be truthful and accurate.
200
    * Mapping census to OSM data is not trivial. Discrepancies are substituted.
201
    * Missing OSM buildings are generated for each amenity.
202
    * Missing amenities are generated by median value of amenities/census cell.
203
204
    **Drawbacks and limitations of the data**
205
206
    * Shape of profiles for each building is similar within a MVGD and only scaled
207
      with a different factor.
208
    * MVGDs are generated dynamically. In case of buildings with amenities
209
      exceeding MVGD borders, amenities which are assigned to a different MVGD than
210
      the assigned building centroid, the amenities are dropped for sake of
211
      simplicity. One building should not have a connection to two MVGDs.
212
    * The completeness of the OSM data depends on community contribution and is
213
      crucial to the quality of our results.
214
    * Randomly selected buildings and generated amenities may inadequately reflect
215
      reality, but are chosen for sake of simplicity as a measure to fill data gaps.
216
    * Since this dataset is a cascade after generation of synthetic residential
217
      buildings also check drawbacks and limitations in hh_buildings.py.
218
    * Synthetic buildings may be placed within osm buildings which exceed multiple
219
      census cells. This is currently accepted but may be solved in #953.
220
    * Scattered high peak loads occur and might lead to single MV grid connections
221
      in ding0. In some cases this might not be viable. Postprocessing is needed and
222
      may be solved in #954.
223
224
    """
225
226
    #:
227
    name: str = "CtsDemandBuildings"
228
    #:
229
    version: str = "0.0.4"
230
231
    def __init__(self, dependencies):
232
        super().__init__(
233
            name=self.name,
234
            version=self.version,
235
            dependencies=dependencies,
236
            tasks=(
237
                cts_buildings,  # TODO: status2023, currently fixed for only 2023
238
                {cts_electricity, cts_heat},
239
                get_cts_electricity_peak_load,
240
                map_all_used_buildings,
241
                assign_voltage_level_to_buildings,
242
            ),
243
        )
244
245
246
def amenities_without_buildings():
247
    """
248
    Amenities which have no buildings assigned and are in a cell with cts
249
    demand are determined.
250
251
    Returns
252
    -------
253
    pd.DataFrame
254
        Table of amenities without buildings
255
    """
256
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
257
258
    with db.session_scope() as session:
259
        scn_query = session.query(
260
            func.distinct(EgonDemandRegioZensusElectricity.scenario)
261
        )
262
263
    # FIXME: Cells with CTS demand, amenities and buildings do not change within the
264
    #  scenarios, only the demand itself. Therefore any scenario can be used
265
    #  universally to determine the cts buildings but not for the demand share.
266
    scn = db.select_dataframe(scn_query.statement).iat[0, 0]
267
268
    with db.session_scope() as session:
269
        cells_query = (
270
            session.query(
271
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
272
                osm_amenities_not_in_buildings_filtered.geom_amenity,
273
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
274
            )
275
            .filter(
276
                func.st_within(
277
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
278
                    DestatisZensusPopulationPerHa.geom,
279
                )
280
            )
281
            .filter(
282
                DestatisZensusPopulationPerHa.id
283
                == EgonDemandRegioZensusElectricity.zensus_population_id
284
            )
285
            .filter(
286
                EgonDemandRegioZensusElectricity.sector == "service",
287
                EgonDemandRegioZensusElectricity.scenario == scn,
288
            )
289
        )
290
291
    df_amenities_without_buildings = gpd.read_postgis(
292
        cells_query.statement,
293
        cells_query.session.bind,
294
        geom_col="geom_amenity",
295
    )
296
    return df_amenities_without_buildings
297
298
299
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
300
    """
301
    Building centroids are placed randomly within census cells.
302
    The Number of buildings is derived from n_amenity_inside, the selected
303
    method and number of amenities per building.
304
305
    Returns
306
    -------
307
    df: gpd.GeoDataFrame
308
        Table of buildings centroids
309
    """
310
    if isinstance(max_amenities, int):
311
        # amount of amenities is randomly generated within bounds
312
        # (max_amenities, amenities per cell)
313
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
314
            random_ints_until_sum, args=[max_amenities]
315
        )
316
    if isinstance(amenities, int):
317
        # Specific amount of amenities per building
318
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
319
            specific_int_until_sum, args=[amenities]
320
        )
321
322
    # Unnest each building
323
    df = df.explode(column="n_amenities_inside")
324
325
    # building count per cell
326
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
327
328
    # generate random synthetic buildings
329
    edge_length = 5
330
    # create random points within census cells
331
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
332
333
    df.reset_index(drop=True, inplace=True)
334
    # Store center of polygon
335
    df["geom_point"] = points
336
    # Drop geometry of census cell
337
    df = df.drop(columns=["geom"])
338
339
    return df
340
341
342
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
343
    """
344
    Synthetic buildings are generated around points.
345
346
    Parameters
347
    ----------
348
    df: pd.DataFrame
349
        Table of census cells
350
    points: gpd.GeoSeries or str
351
        List of points to place buildings around or column name of df
352
    crs: str
353
        CRS of result table
354
355
    Returns
356
    -------
357
    df: gpd.GeoDataFrame
358
        Synthetic buildings
359
    """
360
361
    if isinstance(points, str) and points in df.columns:
362
        points = df[points]
363
    elif isinstance(points, gpd.GeoSeries):
364
        pass
365
    else:
366
        raise ValueError("Points are of the wrong type")
367
368
    # Create building using a square around point
369
    edge_length = 5
370
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
371
372
    if "geom_point" not in df.columns:
373
        df["geom_point"] = df["geom_building"].centroid
374
375
    df = gpd.GeoDataFrame(
376
        df,
377
        crs=crs,
378
        geometry="geom_building",
379
    )
380
381
    # TODO remove after #772 implementation of egon_building_id
382
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
383
384
    # get max number of building ids from synthetic table
385
    with db.session_scope() as session:
386
        max_synth_building_id = session.execute(
387
            func.max(cast(OsmBuildingsSynthetic.id, Integer))
388
        ).scalar()
389
    max_synth_building_id = int(max_synth_building_id)
390
391
    # create sequential ids
392
    df["egon_building_id"] = range(
393
        max_synth_building_id + 1,
394
        max_synth_building_id + df.shape[0] + 1,
395
    )
396
397
    df["area"] = df["geom_building"].area
398
    # set building type of synthetic building
399
    df["building"] = "cts"
400
    # TODO remove after #772
401
    df = df.rename(
402
        columns={
403
            # "zensus_population_id": "cell_id",
404
            "egon_building_id": "id",
405
        }
406
    )
407
    return df
408
409
410
def buildings_with_amenities():
411
    """
412
    Amenities which are assigned to buildings are determined and grouped per
413
    building and zensus cell. Buildings covering multiple cells therefore
414
    exist multiple times but in different zensus cells. This is necessary to
415
    cover as many cells with a cts demand as possible. If buildings exist in
416
    multiple mvgds (bus_id), only the amenities within the same as the
417
    building centroid are kept. If as a result, a census cell is uncovered
418
    by any buildings, a synthetic amenity is placed. The buildings are
419
    aggregated afterwards during the calculation of the profile_share.
420
421
    Returns
422
    -------
423
    df_buildings_with_amenities: gpd.GeoDataFrame
424
        Contains all buildings with amenities per zensus cell.
425
    df_lost_cells: gpd.GeoDataFrame
426
        Contains synthetic amenities in lost cells. Might be empty
427
    """
428
429
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
430
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
431
432
    for scn in config.settings()["egon-data"]["--scenarios"]:
433
        with db.session_scope() as session:
434
            cells_query = (
435
                session.query(
436
                    osm_amenities_in_buildings_filtered,
437
                    MapZensusGridDistricts.bus_id,
438
                )
439
                .filter(
440
                    MapZensusGridDistricts.zensus_population_id
441
                    == osm_amenities_in_buildings_filtered.zensus_population_id
442
                )
443
                .filter(
444
                    EgonDemandRegioZensusElectricity.zensus_population_id
445
                    == osm_amenities_in_buildings_filtered.zensus_population_id
446
                )
447
                .filter(
448
                    EgonDemandRegioZensusElectricity.sector == "service",
449
                    EgonDemandRegioZensusElectricity.scenario == scn,
450
                )
451
            )
452
            df_amenities_in_buildings = pd.read_sql(
453
                cells_query.statement, con=session.connection(), index_col=None
454
            )
455
        if df_amenities_in_buildings.empty:
456
            break
457
458
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
0 ignored issues
show
introduced by
The variable df_amenities_in_buildings does not seem to be defined in case the for loop on line 432 is not entered. Are you sure this can never be the case?
Loading history...
459
        "geom_building"
460
    ].apply(to_shape)
461
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
462
        "geom_amenity"
463
    ].apply(to_shape)
464
465
    # retrieve building centroid bus_id
466
    with db.session_scope() as session:
467
        cells_query = session.query(
468
            egon_map_zensus_buildings_filtered_all.id,
469
            MapZensusGridDistricts.bus_id.label("building_bus_id"),
470
        ).filter(
471
            egon_map_zensus_buildings_filtered_all.zensus_population_id
472
            == MapZensusGridDistricts.zensus_population_id
473
        )
474
475
        df_building_bus_id = pd.read_sql(
476
            cells_query.statement, con=session.connection(), index_col=None
477
        )
478
479
    df_amenities_in_buildings = pd.merge(
480
        left=df_amenities_in_buildings, right=df_building_bus_id, on="id"
481
    )
482
483
    # identify amenities with differing bus_id as building
484
    identified_amenities = df_amenities_in_buildings.loc[
485
        df_amenities_in_buildings["bus_id"]
486
        != df_amenities_in_buildings["building_bus_id"]
487
    ].index
488
489
    lost_cells = df_amenities_in_buildings.loc[
490
        identified_amenities, "zensus_population_id"
491
    ].unique()
492
493
    # check if lost zensus cells are already covered
494
    if not (
495
        df_amenities_in_buildings["zensus_population_id"]
496
        .isin(lost_cells)
497
        .empty
498
    ):
499
        # query geom data for cell if not
500
        with db.session_scope() as session:
501
            cells_query = session.query(
502
                DestatisZensusPopulationPerHa.id,
503
                DestatisZensusPopulationPerHa.geom,
504
            ).filter(
505
                DestatisZensusPopulationPerHa.id.in_(pd.Index(lost_cells))
506
            )
507
508
            df_lost_cells = gpd.read_postgis(
509
                cells_query.statement,
510
                cells_query.session.bind,
511
                geom_col="geom",
512
            )
513
514
        # place random amenity in cell
515
        df_lost_cells["n_amenities_inside"] = 1
516
        df_lost_cells.rename(
517
            columns={
518
                "id": "zensus_population_id",
519
            },
520
            inplace=True,
521
        )
522
        df_lost_cells = place_buildings_with_amenities(
523
            df_lost_cells, amenities=1
524
        )
525
        df_lost_cells.rename(
526
            columns={
527
                # "id": "zensus_population_id",
528
                "geom_point": "geom_amenity",
529
            },
530
            inplace=True,
531
        )
532
        df_lost_cells.drop(
533
            columns=["building_count", "n_amenities_inside"], inplace=True
534
        )
535
    else:
536
        df_lost_cells = None
537
538
    df_amenities_in_buildings.drop(identified_amenities, inplace=True)
539
    df_amenities_in_buildings.drop(columns="building_bus_id", inplace=True)
540
541
    df_amenities_in_buildings["n_amenities_inside"] = 1
542
543
    # sum amenities per building and cell
544
    df_amenities_in_buildings["n_amenities_inside"] = (
545
        df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
546
            "n_amenities_inside"
547
        ].transform("sum")
548
    )
549
    # drop duplicated buildings
550
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
551
        ["id", "zensus_population_id"]
552
    )
553
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
554
555
    df_buildings_with_amenities = df_buildings_with_amenities[
556
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
557
    ]
558
    df_buildings_with_amenities.rename(
559
        columns={
560
            # "zensus_population_id": "cell_id",
561
            "egon_building_id": "id"
562
        },
563
        inplace=True,
564
    )
565
566
    return df_buildings_with_amenities, df_lost_cells
567
568
569
def buildings_without_amenities():
570
    """
571
    Buildings (filtered and synthetic) in cells with
572
    cts demand but no amenities are determined.
573
574
    Returns
575
    -------
576
    df_buildings_without_amenities: gpd.GeoDataFrame
577
        Table of buildings without amenities in zensus cells
578
        with cts demand.
579
    """
580
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
581
    from saio.openstreetmap import (
582
        osm_amenities_shops_filtered,
583
        osm_buildings_filtered,
584
        osm_buildings_synthetic,
585
    )
586
587
    with db.session_scope() as session:
588
        scn_query = session.query(
589
            func.distinct(EgonDemandRegioZensusElectricity.scenario)
590
        )
591
592
    # FIXME: Cells with CTS demand, amenities and buildings do not change within the
593
    #  scenarios, only the demand itself. Therefore any scenario can be used
594
    #  universally to determine the cts buildings but not for the demand share.
595
    scn = db.select_dataframe(scn_query.statement).iat[0, 0]
596
597
    # buildings_filtered in cts-demand-cells without amenities
598
    with db.session_scope() as session:
599
        # Synthetic Buildings
600
        q_synth_buildings = session.query(
601
            osm_buildings_synthetic.cell_id.cast(Integer).label(
602
                "zensus_population_id"
603
            ),
604
            osm_buildings_synthetic.id.cast(Integer).label("id"),
605
            osm_buildings_synthetic.area.label("area"),
606
            osm_buildings_synthetic.geom_building.label("geom_building"),
607
            osm_buildings_synthetic.geom_point.label("geom_point"),
608
        )
609
610
        # Buildings filtered
611
        q_buildings_filtered = session.query(
612
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
613
            osm_buildings_filtered.id,
614
            osm_buildings_filtered.area,
615
            osm_buildings_filtered.geom_building,
616
            osm_buildings_filtered.geom_point,
617
        ).filter(
618
            osm_buildings_filtered.id
619
            == egon_map_zensus_buildings_filtered_all.id
620
        )
621
622
        # Amenities + zensus_population_id
623
        q_amenities = (
624
            session.query(
625
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
626
            )
627
            .filter(
628
                func.st_within(
629
                    osm_amenities_shops_filtered.geom_amenity,
630
                    DestatisZensusPopulationPerHa.geom,
631
                )
632
            )
633
            .distinct(DestatisZensusPopulationPerHa.id)
634
        )
635
636
        # Cells with CTS demand but without amenities
637
        q_cts_without_amenities = (
638
            session.query(
639
                EgonDemandRegioZensusElectricity.zensus_population_id,
640
            )
641
            .filter(
642
                EgonDemandRegioZensusElectricity.sector == "service",
643
                EgonDemandRegioZensusElectricity.scenario == scn,
644
            )
645
            .filter(
646
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
647
                    q_amenities
648
                )
649
            )
650
            .distinct()
651
        )
652
653
        # Buildings filtered + synthetic buildings residential in
654
        # cells with CTS demand but without amenities
655
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
656
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
657
                q_cts_without_amenities
658
            )
659
        )
660
661
    # df_buildings_without_amenities = pd.read_sql(
662
    #     cells_query.statement, cells_query.session.bind, index_col=None)
663
    df_buildings_without_amenities = gpd.read_postgis(
664
        cells_query.statement,
665
        cells_query.session.bind,
666
        geom_col="geom_building",
667
    )
668
669
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
670
        columns={
671
            # "zensus_population_id": "cell_id",
672
            "egon_building_id": "id",
673
        }
674
    )
675
676
    return df_buildings_without_amenities
677
678
679
def select_cts_buildings(df_buildings_wo_amenities, max_n):
680
    """
681
    N Buildings (filtered and synthetic) in each cell with
682
    cts demand are selected. Only the first n buildings
683
    are taken for each cell. The buildings are sorted by surface
684
    area.
685
686
    Returns
687
    -------
688
    df_buildings_with_cts_demand: gpd.GeoDataFrame
689
        Table of buildings
690
    """
691
692
    df_buildings_wo_amenities.sort_values(
693
        "area", ascending=False, inplace=True
694
    )
695
    # select first n ids each census cell if available
696
    df_buildings_with_cts_demand = (
697
        df_buildings_wo_amenities.groupby("zensus_population_id")
698
        .nth(list(range(max_n)))
699
        .reset_index()
700
    )
701
    df_buildings_with_cts_demand.reset_index(drop=True, inplace=True)
702
703
    return df_buildings_with_cts_demand
704
705
706
def cells_with_cts_demand_only(df_buildings_without_amenities):
707
    """
708
    Cells with cts demand but no amenities or buildilngs
709
    are determined.
710
711
    Returns
712
    -------
713
    df_cells_only_cts_demand: gpd.GeoDataFrame
714
        Table of cells with cts demand but no amenities or buildings
715
    """
716
    from saio.openstreetmap import osm_amenities_shops_filtered
717
718
    with db.session_scope() as session:
719
        scn_query = session.query(
720
            func.distinct(EgonDemandRegioZensusElectricity.scenario)
721
        )
722
723
    # FIXME: Cells with CTS demand, amenities and buildings do not change within the
724
    #  scenarios, only the demand itself. Therefore any scenario can be used
725
    #  universally to determine the cts buildings but not for the demand share.
726
    scn = db.select_dataframe(scn_query.statement).iat[0, 0]
727
728
    # cells mit amenities
729
    with db.session_scope() as session:
730
        sub_query = (
731
            session.query(
732
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
733
            )
734
            .filter(
735
                func.st_within(
736
                    osm_amenities_shops_filtered.geom_amenity,
737
                    DestatisZensusPopulationPerHa.geom,
738
                )
739
            )
740
            .distinct(DestatisZensusPopulationPerHa.id)
741
        )
742
743
        cells_query = (
744
            session.query(
745
                EgonDemandRegioZensusElectricity.zensus_population_id,
746
                EgonDemandRegioZensusElectricity.scenario,
747
                EgonDemandRegioZensusElectricity.sector,
748
                EgonDemandRegioZensusElectricity.demand,
749
                DestatisZensusPopulationPerHa.geom,
750
            )
751
            .filter(
752
                EgonDemandRegioZensusElectricity.sector == "service",
753
                EgonDemandRegioZensusElectricity.scenario == scn,
754
            )
755
            .filter(
756
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
757
                    sub_query
758
                )
759
            )
760
            .filter(
761
                EgonDemandRegioZensusElectricity.zensus_population_id
762
                == DestatisZensusPopulationPerHa.id
763
            )
764
        )
765
766
    df_cts_cell_without_amenities = gpd.read_postgis(
767
        cells_query.statement,
768
        cells_query.session.bind,
769
        geom_col="geom",
770
        index_col=None,
771
    )
772
773
    # TODO remove after #722
774
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
775
        columns={"cell_id": "zensus_population_id"}
776
    )
777
778
    # Census cells with only cts demand
779
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
780
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
781
            df_buildings_without_amenities["zensus_population_id"].unique()
782
        )
783
    ]
784
785
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
786
787
    return df_cells_only_cts_demand
788
789
790
def calc_census_cell_share(scenario, sector):
791
    """
792
    The profile share for each census cell is calculated by it's
793
    share of annual demand per substation bus. The annual demand
794
    per cell is defined by DemandRegio/Peta5. The share is for both
795
    scenarios identical as the annual demand is linearly scaled.
796
797
    Parameters
798
    ----------
799
    scenario: str
800
        Scenario for which the share is calculated: "eGon2035" or "eGon100RE"
801
    sector: str
802
        Scenario for which the share is calculated: "electricity" or "heat"
803
804
    Returns
805
    -------
806
    df_census_share: pd.DataFrame
807
    """
808
    if sector == "electricity":
809
        with db.session_scope() as session:
810
            cells_query = (
811
                session.query(
812
                    EgonDemandRegioZensusElectricity,
813
                    MapZensusGridDistricts.bus_id,
814
                )
815
                .filter(EgonDemandRegioZensusElectricity.sector == "service")
816
                .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
817
                .filter(
818
                    EgonDemandRegioZensusElectricity.zensus_population_id
819
                    == MapZensusGridDistricts.zensus_population_id
820
                )
821
            )
822
823
    elif sector == "heat":
824
        with db.session_scope() as session:
825
            cells_query = (
826
                session.query(EgonPetaHeat, MapZensusGridDistricts.bus_id)
827
                .filter(EgonPetaHeat.sector == "service")
828
                .filter(EgonPetaHeat.scenario == scenario)
829
                .filter(
830
                    EgonPetaHeat.zensus_population_id
831
                    == MapZensusGridDistricts.zensus_population_id
832
                )
833
            )
834
835
    df_demand = pd.read_sql(
836
        cells_query.statement,
0 ignored issues
show
introduced by
The variable cells_query does not seem to be defined for all execution paths.
Loading history...
837
        cells_query.session.bind,
838
        index_col="zensus_population_id",
839
    )
840
841
    # get demand share of cell per bus
842
    df_census_share = df_demand["demand"] / df_demand.groupby("bus_id")[
843
        "demand"
844
    ].transform("sum")
845
    df_census_share = df_census_share.rename("cell_share")
846
847
    df_census_share = pd.concat(
848
        [
849
            df_census_share,
850
            df_demand[["bus_id", "scenario"]],
851
        ],
852
        axis=1,
853
    )
854
855
    df_census_share.reset_index(inplace=True)
856
    return df_census_share
857
858
859
def calc_building_demand_profile_share(
860
    df_cts_buildings, scenario="eGon2035", sector="electricity"
861
):
862
    """
863
    Share of cts electricity demand profile per bus for every selected building
864
    is calculated. Building-amenity share is multiplied with census cell share
865
    to get the substation bus profile share for each building. The share is
866
    grouped and aggregated per building as some buildings exceed the shape of
867
    census cells and have amenities assigned from multiple cells. Building
868
    therefore get the amenity share of all census cells.
869
870
    Parameters
871
    ----------
872
    df_cts_buildings: gpd.GeoDataFrame
873
        Table of all buildings with cts demand assigned
874
    scenario: str
875
        Scenario for which the share is calculated.
876
    sector: str
877
        Sector for which the share is calculated.
878
879
    Returns
880
    -------
881
    df_building_share: pd.DataFrame
882
        Table of bus profile share per building
883
884
    """
885
886
    def calc_building_amenity_share(df_cts_buildings):
887
        """
888
        Calculate the building share by the number amenities per building
889
        within a census cell. Building ids can exist multiple time but with
890
        different zensus_population_ids.
891
        """
892
        df_building_amenity_share = df_cts_buildings[
893
            "n_amenities_inside"
894
        ] / df_cts_buildings.groupby("zensus_population_id")[
895
            "n_amenities_inside"
896
        ].transform(
897
            "sum"
898
        )
899
        df_building_amenity_share = pd.concat(
900
            [
901
                df_building_amenity_share.rename("building_amenity_share"),
902
                df_cts_buildings[["zensus_population_id", "id"]],
903
            ],
904
            axis=1,
905
        )
906
        return df_building_amenity_share
907
908
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
909
910
    df_census_cell_share = calc_census_cell_share(
911
        scenario=scenario, sector=sector
912
    )
913
914
    df_demand_share = pd.merge(
915
        left=df_building_amenity_share,
916
        right=df_census_cell_share,
917
        left_on="zensus_population_id",
918
        right_on="zensus_population_id",
919
    )
920
    df_demand_share["profile_share"] = df_demand_share[
921
        "building_amenity_share"
922
    ].multiply(df_demand_share["cell_share"])
923
924
    # only pass selected columns
925
    df_demand_share = df_demand_share[
926
        ["id", "bus_id", "scenario", "profile_share"]
927
    ]
928
929
    # Group and aggregate per building for multi cell buildings
930
    df_demand_share = (
931
        df_demand_share.groupby(["scenario", "id", "bus_id"])
932
        .sum()
933
        .reset_index()
934
    )
935
    if df_demand_share.duplicated("id", keep=False).any():
936
        print(
937
            df_demand_share.loc[df_demand_share.duplicated("id", keep=False)]
938
        )
939
    return df_demand_share
940
941
942 View Code Duplication
def get_peta_demand(mvgd, scenario):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
943
    """
944
    Retrieve annual peta heat demand for CTS for either
945
    eGon2035 or eGon100RE scenario.
946
947
    Parameters
948
    ----------
949
    mvgd : int
950
        ID of substation for which to get CTS demand.
951
    scenario : str
952
        Possible options are eGon2035 or eGon100RE
953
954
    Returns
955
    -------
956
    df_peta_demand : pd.DataFrame
957
        Annual residential heat demand per building and scenario. Columns of
958
        the dataframe are zensus_population_id and demand.
959
960
    """
961
962
    with db.session_scope() as session:
963
        query = (
964
            session.query(
965
                MapZensusGridDistricts.zensus_population_id,
966
                EgonPetaHeat.demand,
967
            )
968
            .filter(MapZensusGridDistricts.bus_id == int(mvgd))
969
            .filter(
970
                MapZensusGridDistricts.zensus_population_id
971
                == EgonPetaHeat.zensus_population_id
972
            )
973
            .filter(
974
                EgonPetaHeat.sector == "service",
975
                EgonPetaHeat.scenario == scenario,
976
            )
977
        )
978
979
        df_peta_demand = pd.read_sql(
980
            query.statement, query.session.bind, index_col=None
981
        )
982
983
    return df_peta_demand
984
985
986
def calc_cts_building_profiles(
987
    bus_ids,
988
    scenario,
989
    sector,
990
):
991
    """
992
    Calculate the cts demand profile for each building. The profile is
993
    calculated by the demand share of the building per substation bus.
994
995
    Parameters
996
    ----------
997
    bus_ids: list of int
998
        Ids of the substation for which selected building profiles are
999
        calculated.
1000
    scenario: str
1001
        Scenario for which the share is calculated: "eGon2035" or "eGon100RE"
1002
    sector: str
1003
        Sector for which the share is calculated: "electricity" or "heat"
1004
1005
    Returns
1006
    -------
1007
    df_building_profiles: pd.DataFrame
1008
        Table of demand profile per building. Column names are building IDs
1009
        and index is hour of the year as int (0-8759).
1010
1011
    """
1012
    if sector == "electricity":
1013
        # Get cts building electricity demand share of selected buildings
1014
        with db.session_scope() as session:
1015
            cells_query = (
1016
                session.query(
1017
                    EgonCtsElectricityDemandBuildingShare,
1018
                )
1019
                .filter(
1020
                    EgonCtsElectricityDemandBuildingShare.scenario == scenario
1021
                )
1022
                .filter(
1023
                    EgonCtsElectricityDemandBuildingShare.bus_id.in_(bus_ids)
1024
                )
1025
            )
1026
1027
        df_demand_share = pd.read_sql(
1028
            cells_query.statement, cells_query.session.bind, index_col=None
1029
        )
1030
1031
        # Get substation cts electricity load profiles of selected bus_ids
1032
        with db.session_scope() as session:
1033
            cells_query = (
1034
                session.query(EgonEtragoElectricityCts).filter(
1035
                    EgonEtragoElectricityCts.scn_name == scenario
1036
                )
1037
            ).filter(EgonEtragoElectricityCts.bus_id.in_(bus_ids))
1038
1039
        df_cts_substation_profiles = pd.read_sql(
1040
            cells_query.statement,
1041
            cells_query.session.bind,
1042
        )
1043
        df_cts_substation_profiles = pd.DataFrame.from_dict(
1044
            df_cts_substation_profiles.set_index("bus_id")["p_set"].to_dict(),
1045
            orient="index",
1046
        )
1047
        # df_cts_profiles = calc_load_curves_cts(scenario)
1048
1049
    elif sector == "heat":
1050
        # Get cts building heat demand share of selected buildings
1051
        with db.session_scope() as session:
1052
            cells_query = (
1053
                session.query(
1054
                    EgonCtsHeatDemandBuildingShare,
1055
                )
1056
                .filter(EgonCtsHeatDemandBuildingShare.scenario == scenario)
1057
                .filter(EgonCtsHeatDemandBuildingShare.bus_id.in_(bus_ids))
1058
            )
1059
1060
        df_demand_share = pd.read_sql(
1061
            cells_query.statement, cells_query.session.bind, index_col=None
1062
        )
1063
1064
        # Get substation cts heat load profiles of selected bus_ids
1065
        # (this profile only contains zensus cells with individual heating;
1066
        #  in order to obtain a profile for the whole MV grid it is afterwards
1067
        #  scaled by the grids total CTS demand from peta)
1068
        with db.session_scope() as session:
1069
            cells_query = (
1070
                session.query(EgonEtragoHeatCts).filter(
1071
                    EgonEtragoHeatCts.scn_name == scenario
1072
                )
1073
            ).filter(EgonEtragoHeatCts.bus_id.in_(bus_ids))
1074
1075
        df_cts_substation_profiles = pd.read_sql(
1076
            cells_query.statement,
1077
            cells_query.session.bind,
1078
        )
1079
        df_cts_substation_profiles = pd.DataFrame.from_dict(
1080
            df_cts_substation_profiles.set_index("bus_id")["p_set"].to_dict(),
1081
            orient="index",
1082
        )
1083
        for bus_id in bus_ids:
1084
            if bus_id in df_cts_substation_profiles.index:
1085
                # get peta demand to scale load profile to
1086
                peta_cts_demand = get_peta_demand(bus_id, scenario)
1087
                scaling_factor = (
1088
                    peta_cts_demand.demand.sum()
1089
                    / df_cts_substation_profiles.loc[bus_id, :].sum()
1090
                )
1091
                # scale load profile
1092
                df_cts_substation_profiles.loc[bus_id, :] *= scaling_factor
1093
1094
    else:
1095
        raise KeyError("Sector needs to be either 'electricity' or 'heat'")
1096
1097
    # TODO remove after #722
1098
    df_demand_share.rename(columns={"id": "building_id"}, inplace=True)
1099
1100
    # get demand profile for all buildings for selected demand share
1101
    df_building_profiles = pd.DataFrame()
1102
    for bus_id, df in df_demand_share.groupby("bus_id"):
1103
        shares = df.set_index("building_id", drop=True)["profile_share"]
1104
        try:
1105
            profile_ts = df_cts_substation_profiles.loc[bus_id]
1106
        except KeyError:
1107
            # This should only happen within the SH cutout
1108
            log.info(
1109
                f"No CTS profile found for substation with bus_id:"
1110
                f" {bus_id}"
1111
            )
1112
            continue
1113
1114
        building_profiles = np.outer(profile_ts, shares)
1115
        building_profiles = pd.DataFrame(
1116
            building_profiles, index=profile_ts.index, columns=shares.index
1117
        )
1118
        df_building_profiles = pd.concat(
1119
            [df_building_profiles, building_profiles], axis=1
1120
        )
1121
1122
    return df_building_profiles
1123
1124
1125
def delete_synthetic_cts_buildings():
1126
    """
1127
    All synthetic cts buildings are deleted from the DB. This is necessary if
1128
    the task is run multiple times as the existing synthetic buildings
1129
    influence the results.
1130
    """
1131
    # import db tables
1132
    from saio.openstreetmap import osm_buildings_synthetic
1133
1134
    # cells mit amenities
1135
    with db.session_scope() as session:
1136
        session.query(osm_buildings_synthetic).filter(
1137
            osm_buildings_synthetic.building == "cts"
1138
        ).delete()
1139
1140
1141
def remove_double_bus_id(df_cts_buildings):
1142
    """This is an backup adhoc fix if there should still be a building which
1143
    is assigned to 2 substations. In this case one of the buildings is just
1144
    dropped. As this currently accounts for only one building with one amenity
1145
    the deviation is neglectable."""
1146
    # assign bus_id via census cell of amenity
1147
    with db.session_scope() as session:
1148
        cells_query = session.query(
1149
            MapZensusGridDistricts.zensus_population_id,
1150
            MapZensusGridDistricts.bus_id,
1151
        )
1152
1153
    df_egon_map_zensus_buildings_buses = pd.read_sql(
1154
        cells_query.statement,
1155
        cells_query.session.bind,
1156
        index_col=None,
1157
    )
1158
    df_cts_buildings = pd.merge(
1159
        left=df_cts_buildings,
1160
        right=df_egon_map_zensus_buildings_buses,
1161
        on="zensus_population_id",
1162
    )
1163
1164
    substation_per_building = df_cts_buildings.groupby("id")[
1165
        "bus_id"
1166
    ].nunique()
1167
    building_id = substation_per_building.loc[
1168
        substation_per_building > 1
1169
    ].index
1170
    df_duplicates = df_cts_buildings.loc[
1171
        df_cts_buildings["id"].isin(building_id)
1172
    ]
1173
    for unique_id in df_duplicates["id"].unique():
1174
        drop_index = df_duplicates[df_duplicates["id"] == unique_id].index[0]
1175
        print(
1176
            f"Buildings {df_cts_buildings.loc[drop_index, 'id']}"
1177
            f" dropped because of double substation"
1178
        )
1179
        df_cts_buildings.drop(index=drop_index, inplace=True)
1180
1181
    df_cts_buildings.drop(columns="bus_id", inplace=True)
1182
1183
    return df_cts_buildings
1184
1185
1186
def cts_buildings():
1187
    """
1188
    Assigns CTS demand to buildings and calculates the respective demand
1189
    profiles. The demand profile per substation are disaggregated per
1190
    annual demand share of each census cell and by the number of amenities
1191
    per building within the cell. If no building data is available,
1192
    synthetic buildings are generated around the amenities. If no amenities
1193
    but cts demand is available, buildings are randomly selected. If no
1194
    building nor amenity is available, random synthetic buildings are
1195
    generated. The demand share is stored in the database.
1196
1197
    Note
1198
    -----
1199
    Cells with CTS demand, amenities and buildings do not change within
1200
    the scenarios, only the demand itself. Therefore any scenario
1201
    can be used universally to determine the cts buildings but not for
1202
    the demand share.
1203
    """
1204
1205
    # ========== Register np datatypes with SQLA ==========
1206
    def adapt_numpy_float64(numpy_float64):
1207
        return AsIs(numpy_float64)
1208
1209
    def adapt_numpy_int64(numpy_int64):
1210
        return AsIs(numpy_int64)
1211
1212
    register_adapter(np.float64, adapt_numpy_float64)
1213
    register_adapter(np.int64, adapt_numpy_int64)
1214
    # =====================================================
1215
1216
    log.info("Start logging!")
1217
    # Buildings with amenities
1218
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()  # TODO: status2023 this is fixed to 2023
1219
    log.info("Buildings with amenities selected!")
1220
1221
    # Median number of amenities per cell
1222
    median_n_amenities = int(
1223
        df_buildings_with_amenities.groupby("zensus_population_id")[
1224
            "n_amenities_inside"
1225
        ]
1226
        .sum()
1227
        .median()
1228
    )
1229
    log.info(f"Median amenity value: {median_n_amenities}")
1230
1231
    # Remove synthetic CTS buildings if existing
1232
    delete_synthetic_cts_buildings()
1233
    log.info("Old synthetic cts buildings deleted!")
1234
1235
    # Amenities not assigned to buildings
1236
    df_amenities_without_buildings = amenities_without_buildings()
1237
    log.info("Amenities without buildings selected!")
1238
1239
    # Append lost cells due to duplicated ids, to cover all demand cells
1240
    if not df_lost_cells.empty:
1241
        # Number of synth amenities per cell
1242
        df_lost_cells["amenities"] = median_n_amenities
1243
        # create row for every amenity
1244
        df_lost_cells["amenities"] = (
1245
            df_lost_cells["amenities"].astype(int).apply(range)
1246
        )
1247
        df_lost_cells = df_lost_cells.explode("amenities")
1248
        df_lost_cells.drop(columns="amenities", inplace=True)
1249
        df_amenities_without_buildings = pd.concat(
1250
            [df_amenities_without_buildings, df_lost_cells], ignore_index=True
1251
        )
1252
        log.info(
1253
            f"{df_lost_cells.shape[0]} lost cells due to substation "
1254
            f"intersection appended!"
1255
        )
1256
1257
    # One building per amenity
1258
    df_amenities_without_buildings["n_amenities_inside"] = 1
1259
    # Create synthetic buildings for amenites without buildings
1260
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
1261
        df_amenities_without_buildings, points="geom_amenity"
1262
    )
1263
    log.info("Synthetic buildings created!")
1264
1265
    # TODO remove renaming after #722
1266
    write_table_to_postgis(
1267
        df_synthetic_buildings_with_amenities.rename(
1268
            columns={
1269
                "zensus_population_id": "cell_id",
1270
                "egon_building_id": "id",
1271
            }
1272
        ),
1273
        OsmBuildingsSynthetic,
1274
        engine=engine,
1275
        drop=False,
1276
    )
1277
    log.info("Synthetic buildings exported to DB!")
1278
1279
    # Cells without amenities but CTS demand and buildings
1280
    df_buildings_without_amenities = buildings_without_amenities()
1281
    log.info("Buildings without amenities in demand cells identified!")
1282
1283
    # Backup Bugfix for duplicated buildings which occure in SQL-Querry
1284
    # drop building ids which have already been used
1285
    mask = df_buildings_without_amenities.loc[
1286
        df_buildings_without_amenities["id"].isin(
1287
            df_buildings_with_amenities["id"]
1288
        )
1289
    ].index
1290
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
1291
        index=mask
1292
    ).reset_index(drop=True)
1293
    log.info(f"{len(mask)} duplicated ids removed!")
1294
1295
    # select median n buildings per cell
1296
    df_buildings_without_amenities = select_cts_buildings(
1297
        df_buildings_without_amenities, max_n=median_n_amenities
1298
    )
1299
    df_buildings_without_amenities["n_amenities_inside"] = 1
1300
    log.info(f"{median_n_amenities} buildings per cell selected!")
1301
1302
    # Create synthetic amenities and buildings in cells with only CTS demand
1303
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
1304
        df_buildings_without_amenities
1305
    )
1306
    log.info("Cells with only demand identified!")
1307
1308
    # TODO implement overlay prevention #953 here
1309
    # Median n Amenities per cell
1310
    df_cells_with_cts_demand_only["amenities"] = median_n_amenities
1311
    # create row for every amenity
1312
    df_cells_with_cts_demand_only["amenities"] = (
1313
        df_cells_with_cts_demand_only["amenities"].astype(int).apply(range)
1314
    )
1315
    df_cells_with_cts_demand_only = df_cells_with_cts_demand_only.explode(
1316
        "amenities"
1317
    )
1318
    df_cells_with_cts_demand_only.drop(columns="amenities", inplace=True)
1319
1320
    # Only 1 Amenity per Building
1321
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
1322
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
1323
        df_cells_with_cts_demand_only, amenities=1
1324
    )
1325
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
1326
        df_cells_with_cts_demand_only, points="geom_point"
1327
    )
1328
    log.info(f"{median_n_amenities} synthetic buildings per cell created")
1329
1330
    # TODO remove renaming after #722
1331
    write_table_to_postgis(
1332
        df_synthetic_buildings_without_amenities.rename(
1333
            columns={
1334
                "zensus_population_id": "cell_id",
1335
                "egon_building_id": "id",
1336
            }
1337
        ),
1338
        OsmBuildingsSynthetic,
1339
        engine=engine,
1340
        drop=False,
1341
    )
1342
    log.info("Synthetic buildings exported to DB")
1343
1344
    # Concat all buildings
1345
    columns = [
1346
        "zensus_population_id",
1347
        "id",
1348
        "geom_building",
1349
        "n_amenities_inside",
1350
        "source",
1351
    ]
1352
1353
    df_buildings_with_amenities["source"] = "bwa"
1354
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
1355
    df_buildings_without_amenities["source"] = "bwoa"
1356
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
1357
1358
    df_cts_buildings = pd.concat(
1359
        [
1360
            df_buildings_with_amenities[columns],
1361
            df_synthetic_buildings_with_amenities[columns],
1362
            df_buildings_without_amenities[columns],
1363
            df_synthetic_buildings_without_amenities[columns],
1364
        ],
1365
        axis=0,
1366
        ignore_index=True,
1367
    )
1368
    df_cts_buildings = remove_double_bus_id(df_cts_buildings)
1369
    log.info("Double bus_id checked")
1370
1371
    # TODO remove dypte correction after #722
1372
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
1373
1374
    df_cts_buildings = gpd.GeoDataFrame(
1375
        df_cts_buildings, geometry="geom_building", crs=3035
1376
    )
1377
    df_cts_buildings = df_cts_buildings.reset_index().rename(
1378
        columns={"index": "serial"}
1379
    )
1380
1381
    # Write table to db for debugging and postprocessing
1382
    write_table_to_postgis(
1383
        df_cts_buildings,
1384
        CtsBuildings,
1385
        engine=engine,
1386
        drop=True,
1387
    )
1388
    log.info("CTS buildings exported to DB!")
1389
1390
1391 View Code Duplication
def cts_electricity():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1392
    """
1393
    Calculate cts electricity demand share of hvmv substation profile
1394
     for buildings.
1395
    """
1396
    log.info("Start logging!")
1397
    with db.session_scope() as session:
1398
        cells_query = session.query(CtsBuildings)
1399
1400
    df_cts_buildings = pd.read_sql(
1401
        cells_query.statement, cells_query.session.bind, index_col=None
1402
    )
1403
    log.info("CTS buildings from DB imported!")
1404
1405
    df_demand_share = pd.DataFrame()
1406
1407
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1408
        df_demand_share_per_scenario = calc_building_demand_profile_share(
1409
            df_cts_buildings, scenario=scenario, sector="electricity"
1410
        )
1411
        log.info(f"Profile share for {scenario} calculated!")
1412
1413
        df_demand_share = pd.concat(
1414
            [df_demand_share, df_demand_share_per_scenario],
1415
            axis=0,
1416
            ignore_index=True,
1417
        )
1418
1419
    df_demand_share.rename(columns={"id": "building_id"}, inplace=True)
1420
1421
    write_table_to_postgres(
1422
        df_demand_share,
1423
        EgonCtsElectricityDemandBuildingShare,
1424
        drop=True,
1425
    )
1426
    log.info("Profile share exported to DB!")
1427
1428
1429 View Code Duplication
def cts_heat():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1430
    """
1431
    Calculate cts electricity demand share of hvmv substation profile
1432
     for buildings.
1433
    """
1434
    log.info("Start logging!")
1435
    with db.session_scope() as session:
1436
        cells_query = session.query(CtsBuildings)
1437
1438
    df_cts_buildings = pd.read_sql(
1439
        cells_query.statement, cells_query.session.bind, index_col=None
1440
    )
1441
    log.info("CTS buildings from DB imported!")
1442
1443
    df_demand_share = pd.DataFrame()
1444
1445
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1446
        df_demand_share_per_scenario = calc_building_demand_profile_share(
1447
            df_cts_buildings, scenario=scenario, sector="heat"
1448
        )
1449
        log.info(f"Profile share for {scenario} calculated!")
1450
1451
        df_demand_share = pd.concat(
1452
            [df_demand_share, df_demand_share_per_scenario],
1453
            axis=0,
1454
            ignore_index=True,
1455
        )
1456
1457
    df_demand_share.rename(columns={"id": "building_id"}, inplace=True)
1458
1459
    write_table_to_postgres(
1460
        df_demand_share,
1461
        EgonCtsHeatDemandBuildingShare,
1462
        drop=True,
1463
    )
1464
    log.info("Profile share exported to DB!")
1465
1466
1467 View Code Duplication
def get_cts_electricity_peak_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1468
    """
1469
    Get electricity peak load of all CTS buildings for both scenarios and
1470
    store in DB.
1471
    """
1472
    log.info("Start logging!")
1473
1474
    BuildingElectricityPeakLoads.__table__.create(bind=engine, checkfirst=True)
1475
1476
    # Delete rows with cts demand
1477
    with db.session_scope() as session:
1478
        session.query(BuildingElectricityPeakLoads).filter(
1479
            BuildingElectricityPeakLoads.sector == "cts"
1480
        ).delete()
1481
    log.info("Cts electricity peak load removed from DB!")
1482
1483
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1484
        with db.session_scope() as session:
1485
            cells_query = session.query(
1486
                EgonCtsElectricityDemandBuildingShare
1487
            ).filter(
1488
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
1489
            )
1490
1491
        df_demand_share = pd.read_sql(
1492
            cells_query.statement, cells_query.session.bind, index_col=None
1493
        )
1494
1495
        with db.session_scope() as session:
1496
            cells_query = session.query(EgonEtragoElectricityCts).filter(
1497
                EgonEtragoElectricityCts.scn_name == scenario
1498
            )
1499
1500
        df_cts_profiles = pd.read_sql(
1501
            cells_query.statement,
1502
            cells_query.session.bind,
1503
        )
1504
        df_cts_profiles = pd.DataFrame.from_dict(
1505
            df_cts_profiles.set_index("bus_id")["p_set"].to_dict(),
1506
            orient="columns",
1507
        )
1508
1509
        df_peak_load = pd.merge(
1510
            left=df_cts_profiles.max().astype(float).rename("max"),
1511
            right=df_demand_share,
1512
            left_index=True,
1513
            right_on="bus_id",
1514
        )
1515
1516
        # Convert unit from MWh to W
1517
        df_peak_load["max"] = df_peak_load["max"] * 1e6
1518
        df_peak_load["peak_load_in_w"] = (
1519
            df_peak_load["max"] * df_peak_load["profile_share"]
1520
        )
1521
        log.info(f"Peak load for {scenario} determined!")
1522
1523
        # TODO remove after #772
1524
        df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1525
        df_peak_load["sector"] = "cts"
1526
1527
        # # Write peak loads into db
1528
        write_table_to_postgres(
1529
            df_peak_load,
1530
            BuildingElectricityPeakLoads,
1531
            drop=False,
1532
            index=False,
1533
            if_exists="append",
1534
        )
1535
1536
        log.info(f"Peak load for {scenario} exported to DB!")
1537
1538
1539 View Code Duplication
def get_cts_heat_peak_load():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1540
    """
1541
    Get heat peak load of all CTS buildings for both scenarios and store in DB.
1542
    """
1543
    log.info("Start logging!")
1544
1545
    BuildingHeatPeakLoads.__table__.create(bind=engine, checkfirst=True)
1546
1547
    # Delete rows with cts demand
1548
    with db.session_scope() as session:
1549
        session.query(BuildingHeatPeakLoads).filter(
1550
            BuildingHeatPeakLoads.sector == "cts"
1551
        ).delete()
1552
    log.info("Cts heat peak load removed from DB!")
1553
1554
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1555
        with db.session_scope() as session:
1556
            cells_query = session.query(
1557
                EgonCtsElectricityDemandBuildingShare
1558
            ).filter(
1559
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
1560
            )
1561
1562
        df_demand_share = pd.read_sql(
1563
            cells_query.statement, cells_query.session.bind, index_col=None
1564
        )
1565
        log.info(f"Retrieved demand share for scenario: {scenario}")
1566
1567
        with db.session_scope() as session:
1568
            cells_query = session.query(EgonEtragoHeatCts).filter(
1569
                EgonEtragoHeatCts.scn_name == scenario
1570
            )
1571
1572
        df_cts_profiles = pd.read_sql(
1573
            cells_query.statement,
1574
            cells_query.session.bind,
1575
        )
1576
        log.info(f"Retrieved substation profiles for scenario: {scenario}")
1577
1578
        df_cts_profiles = pd.DataFrame.from_dict(
1579
            df_cts_profiles.set_index("bus_id")["p_set"].to_dict(),
1580
            orient="columns",
1581
        )
1582
1583
        df_peak_load = pd.merge(
1584
            left=df_cts_profiles.max().astype(float).rename("max"),
1585
            right=df_demand_share,
1586
            left_index=True,
1587
            right_on="bus_id",
1588
        )
1589
1590
        # Convert unit from MWh to W
1591
        df_peak_load["max"] = df_peak_load["max"] * 1e6
1592
        df_peak_load["peak_load_in_w"] = (
1593
            df_peak_load["max"] * df_peak_load["profile_share"]
1594
        )
1595
        log.info(f"Peak load for {scenario} determined!")
1596
1597
        # TODO remove after #772
1598
        df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1599
        df_peak_load["sector"] = "cts"
1600
1601
        # # Write peak loads into db
1602
        write_table_to_postgres(
1603
            df_peak_load,
1604
            BuildingHeatPeakLoads,
1605
            drop=False,
1606
            index=False,
1607
            if_exists="append",
1608
        )
1609
1610
        log.info(f"Peak load for {scenario} exported to DB!")
1611
1612
1613
def assign_voltage_level_to_buildings():
1614
    """
1615
    Add voltage level to all buildings by summed peak demand.
1616
1617
    All entries with same building id get the voltage level corresponding
1618
    to their summed residential and cts peak demand.
1619
    """
1620
1621
    with db.session_scope() as session:
1622
        cells_query = session.query(BuildingElectricityPeakLoads)
1623
1624
        df_peak_loads = pd.read_sql(
1625
            cells_query.statement,
1626
            cells_query.session.bind,
1627
        )
1628
1629
    df_peak_load_buildings = df_peak_loads.groupby(
1630
        ["building_id", "scenario"]
1631
    )["peak_load_in_w"].sum()
1632
    df_peak_load_buildings = df_peak_load_buildings.to_frame()
1633
    df_peak_load_buildings.loc[:, "voltage_level"] = 0
1634
1635
    # Identify voltage_level by thresholds defined in the eGon project
1636
    df_peak_load_buildings.loc[
1637
        df_peak_load_buildings["peak_load_in_w"] <= 0.1 * 1e6, "voltage_level"
1638
    ] = 7
1639
    df_peak_load_buildings.loc[
1640
        df_peak_load_buildings["peak_load_in_w"] > 0.1 * 1e6, "voltage_level"
1641
    ] = 6
1642
    df_peak_load_buildings.loc[
1643
        df_peak_load_buildings["peak_load_in_w"] > 0.2 * 1e6, "voltage_level"
1644
    ] = 5
1645
    df_peak_load_buildings.loc[
1646
        df_peak_load_buildings["peak_load_in_w"] > 5.5 * 1e6, "voltage_level"
1647
    ] = 4
1648
    df_peak_load_buildings.loc[
1649
        df_peak_load_buildings["peak_load_in_w"] > 20 * 1e6, "voltage_level"
1650
    ] = 3
1651
    df_peak_load_buildings.loc[
1652
        df_peak_load_buildings["peak_load_in_w"] > 120 * 1e6, "voltage_level"
1653
    ] = 1
1654
1655
    df_peak_load = pd.merge(
1656
        left=df_peak_loads.drop(columns="voltage_level"),
1657
        right=df_peak_load_buildings["voltage_level"],
1658
        how="left",
1659
        left_on=["building_id", "scenario"],
1660
        right_index=True,
1661
    )
1662
1663
    # Write peak loads into db
1664
    # remove table and replace by new
1665
    write_table_to_postgres(
1666
        df_peak_load,
1667
        BuildingElectricityPeakLoads,
1668
        drop=True,
1669
        index=False,
1670
        if_exists="append",
1671
    )
1672