Passed
Pull Request — dev (#826)
by
unknown
02:04
created

calc_building_profiles()   C

Complexity

Conditions 9

Size

Total Lines 72
Code Lines 34

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 34
dl 0
loc 72
rs 6.6666
c 0
b 0
f 0
cc 9
nop 4

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    BuildingPeakLoads,
17
    OsmBuildingsSynthetic,
18
)
19
from egon.data.datasets.electricity_demand_timeseries.tools import (
20
    random_ints_until_sum,
21
    random_point_in_square,
22
    specific_int_until_sum,
23
    write_table_to_postgis,
24
    write_table_to_postgres,
25
)
26
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
27
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
28
import egon.data.config
29
30
engine = db.engine()
31
Base = declarative_base()
32
33
# import db tables
34
saio.register_schema("openstreetmap", engine=engine)
35
saio.register_schema("society", engine=engine)
36
saio.register_schema("demand", engine=engine)
37
saio.register_schema("boundaries", engine=engine)
38
39
40
class EgonCtsElectricityDemandBuildingShare(Base):
41
    __tablename__ = "egon_cts_electricity_demand_building_share"
42
    __table_args__ = {"schema": "demand"}
43
44
    id = Column(Integer, primary_key=True)
45
    scenario = Column(String, primary_key=True)
46
    bus_id = Column(Integer, index=True)
47
    profile_share = Column(Float)
48
49
50
class CtsBuildings(Base):
51
    __tablename__ = "egon_cts_buildings"
52
    __table_args__ = {"schema": "openstreetmap"}
53
54
    serial = Column(Integer, primary_key=True)
55
    id = Column(Integer)
56
    zensus_population_id = Column(Integer, index=True)
57
    geom_building = Column(Geometry("Polygon", 3035))
58
    n_amenities_inside = Column(Integer)
59
    source = Column(String)
60
61
62
def amenities_without_buildings():
63
    """
64
    Amenities which have no buildings assigned and are in
65
    a cell with cts demand are determined.
66
67
    Returns
68
    -------
69
    pd.DataFrame
70
        Table of amenities without buildings
71
    """
72
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
73
74
    with db.session_scope() as session:
75
        cells_query = (
76
            session.query(
77
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
78
                # TODO can be used for square around amenity
79
                #  (1 geom_amenity: 1 geom_building)
80
                #  not unique amenity_ids yet
81
                osm_amenities_not_in_buildings_filtered.geom_amenity,
82
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
83
                # EgonDemandRegioZensusElectricity.demand,
84
                # # TODO can be used to generate n random buildings
85
                # # (n amenities : 1 randombuilding)
86
                # func.count(
87
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
88
                # ).label("n_amenities_inside"),
89
                # DestatisZensusPopulationPerHa.geom,
90
            )
91
            .filter(
92
                func.st_within(
93
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
94
                    DestatisZensusPopulationPerHa.geom,
95
                )
96
            )
97
            .filter(
98
                DestatisZensusPopulationPerHa.id
99
                == EgonDemandRegioZensusElectricity.zensus_population_id
100
            )
101
            .filter(
102
                EgonDemandRegioZensusElectricity.sector == "service",
103
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
104
                #         ).group_by(
105
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
106
                #             DestatisZensusPopulationPerHa.geom,
107
            )
108
        )
109
    # # TODO can be used to generate n random buildings
110
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
111
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
112
    # )
113
    #
114
115
    # # TODO can be used for square around amenity
116
    df_amenities_without_buildings = gpd.read_postgis(
117
        cells_query.statement,
118
        cells_query.session.bind,
119
        geom_col="geom_amenity",
120
    )
121
    return df_amenities_without_buildings
122
123
124
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
125
    """
126
    Building centroids are placed randomly within census cells.
127
    The Number of buildings is derived from n_amenity_inside, the selected
128
    method and number of amenities per building.
129
130
    Returns
131
    -------
132
    df: gpd.GeoDataFrame
133
        Table of buildings centroids
134
    """
135
    if isinstance(max_amenities, int):
136
        # amount of amenities is randomly generated within bounds
137
        # (max_amenities, amenities per cell)
138
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
139
            random_ints_until_sum, args=[max_amenities]
140
        )
141
    if isinstance(amenities, int):
142
        # Specific amount of amenities per building
143
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
144
            specific_int_until_sum, args=[amenities]
145
        )
146
147
    # Unnest each building
148
    df = df.explode(column="n_amenities_inside")
149
150
    # building count per cell
151
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
152
153
    # generate random synthetic buildings
154
    edge_length = 5
155
    # create random points within census cells
156
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
157
158
    df.reset_index(drop=True, inplace=True)
159
    # Store center of polygon
160
    df["geom_point"] = points
161
    # Drop geometry of census cell
162
    df = df.drop(columns=["geom"])
163
164
    return df
165
166
167
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
168
    """
169
    Synthetic buildings are generated around points.
170
171
    Parameters
172
    ----------
173
    df: pd.DataFrame
174
        Table of census cells
175
    points: gpd.GeoSeries or str
176
        List of points to place buildings around or column name of df
177
    crs: str
178
        CRS of result table
179
180
    Returns
181
    -------
182
    df: gpd.GeoDataFrame
183
        Synthetic buildings
184
    """
185
186
    if isinstance(points, str) and points in df.columns:
187
        points = df[points]
188
    elif isinstance(points, gpd.GeoSeries):
189
        pass
190
    else:
191
        raise ValueError("Points are of the wrong type")
192
193
    # Create building using a square around point
194
    edge_length = 5
195
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
196
197
    if "geom_point" not in df.columns:
198
        df["geom_point"] = df["geom_building"].centroid
199
200
    # TODO Check CRS
201
    df = gpd.GeoDataFrame(
202
        df,
203
        crs=crs,
204
        geometry="geom_building",
205
    )
206
207
    # TODO remove after implementation of egon_building_id
208
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
209
210
    # get max number of building ids from synthetic residential table
211
    with db.session_scope() as session:
212
        max_synth_residential_id = session.execute(
213
            func.max(OsmBuildingsSynthetic.id)
214
        ).scalar()
215
    max_synth_residential_id = int(max_synth_residential_id)
216
217
    # create sequential ids
218
    df["egon_building_id"] = range(
219
        max_synth_residential_id + 1,
220
        max_synth_residential_id + df.shape[0] + 1,
221
    )
222
223
    df["area"] = df["geom_building"].area
224
    # set building type of synthetic building
225
    df["building"] = "cts"
226
    # TODO remove in #772
227
    df = df.rename(
228
        columns={
229
            # "zensus_population_id": "cell_id",
230
            "egon_building_id": "id",
231
        }
232
    )
233
    return df
234
235
236
def buildings_with_amenities():
237
    """
238
    Amenities which are assigned to buildings are determined
239
    and grouped per building and zensus cell. Buildings
240
    covering multiple cells therefore exists multiple times
241
    but in different zensus cells. This is necessary to cover
242
    all cells with a cts demand. If buildings exist in multiple
243
    substations, their amenities are summed and assigned and kept in
244
    one substation only. If as a result, a census cell is uncovered,
245
    a synthetic amenity is placed. The buildings are aggregated
246
    afterwards during the calculation of the profile_share.
247
248
    Returns
249
    -------
250
    df_buildings_with_amenities: gpd.GeoDataFrame
251
        Contains all buildings with amenities per zensus cell.
252
    df_lost_cells: gpd.GeoDataFrame
253
        Contains synthetic amenities in lost cells. Might be empty
254
    """
255
256
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
257
258
    with db.session_scope() as session:
259
        cells_query = (
260
            session.query(
261
                osm_amenities_in_buildings_filtered,
262
                MapZensusGridDistricts.bus_id,
263
            )
264
            .filter(
265
                MapZensusGridDistricts.zensus_population_id
266
                == osm_amenities_in_buildings_filtered.zensus_population_id
267
            )
268
            .filter(
269
                EgonDemandRegioZensusElectricity.zensus_population_id
270
                == osm_amenities_in_buildings_filtered.zensus_population_id
271
            )
272
            .filter(
273
                EgonDemandRegioZensusElectricity.sector == "service",
274
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
275
            )
276
        )
277
    df_amenities_in_buildings = pd.read_sql(
278
        cells_query.statement, cells_query.session.bind, index_col=None
279
    )
280
281
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
282
        "geom_building"
283
    ].apply(to_shape)
284
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
285
        "geom_amenity"
286
    ].apply(to_shape)
287
288
    df_amenities_in_buildings["n_amenities_inside"] = 1
289
290
    # add identifier column for buildings in multiple substations
291
    df_amenities_in_buildings[
292
        "duplicate_identifier"
293
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
294
        "n_amenities_inside"
295
    ].transform(
296
        "cumsum"
297
    )
298
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
299
        ["id", "duplicate_identifier"]
300
    )
301
    # sum amenities of buildings with multiple substations
302
    df_amenities_in_buildings[
303
        "n_amenities_inside"
304
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
305
        "n_amenities_inside"
306
    ].transform(
307
        "sum"
308
    )
309
    # identify lost zensus cells
310
    df_lost_cells = df_amenities_in_buildings.loc[
311
        df_amenities_in_buildings.duplicated(
312
            subset=["id", "duplicate_identifier"], keep="first"
313
        )
314
    ]["zensus_population_id"]
315
316
    # drop duplicated buildings
317
    df_amenities_in_buildings.drop_duplicates(
318
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
319
    )
320
321
    # check if lost zensus cells are already covered
322
    if not df_lost_cells.empty:
323
        if not (
324
            df_amenities_in_buildings["zensus_population_id"]
325
            .isin(df_lost_cells)
326
            .empty
327
        ):
328
            # query geom data for cell if not
329
            with db.session_scope() as session:
330
                cells_query = session.query(
331
                    DestatisZensusPopulationPerHa.id,
332
                    DestatisZensusPopulationPerHa.geom,
333
                ).filter(DestatisZensusPopulationPerHa.id.in_(df_lost_cells))
334
335
            df_lost_cells = gpd.read_postgis(
336
                cells_query.statement,
337
                cells_query.session.bind,
338
                geom_col="geom",
339
            )
340
            # TODO maybe adapt method
341
            # place random amenity in cell
342
            df_lost_cells["n_amenities_inside"] = 1
343
            df_lost_cells.rename(
344
                columns={
345
                    "id": "zensus_population_id",
346
                },
347
                inplace=True,
348
            )
349
            df_lost_cells = place_buildings_with_amenities(
350
                df_lost_cells, amenities=1
351
            )
352
            df_lost_cells.rename(
353
                columns={
354
                    # "id": "zensus_population_id",
355
                    "geom_point": "geom_amenity",
356
                },
357
                inplace=True,
358
            )
359
            df_lost_cells.drop(
360
                columns=["building_count", "n_amenities_inside"], inplace=True
361
            )
362
        else:
363
            df_lost_cells = None
364
    else:
365
        df_lost_cells = None
366
367
    # drop helper columns
368
    df_amenities_in_buildings.drop(
369
        columns=["duplicate_identifier"], inplace=True
370
    )
371
372
    # sum amenities per building and cell
373
    df_amenities_in_buildings[
374
        "n_amenities_inside"
375
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
376
        "n_amenities_inside"
377
    ].transform(
378
        "sum"
379
    )
380
    # drop duplicated buildings
381
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
382
        ["id", "zensus_population_id"]
383
    )
384
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
385
386
    df_buildings_with_amenities = df_buildings_with_amenities[
387
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
388
    ]
389
    df_buildings_with_amenities.rename(
390
        columns={
391
            # "zensus_population_id": "cell_id",
392
            "egon_building_id": "id"
393
        },
394
        inplace=True,
395
    )
396
397
    return df_buildings_with_amenities, df_lost_cells
398
399
400
def buildings_without_amenities():
401
    """
402
    Buildings (filtered and synthetic) in cells with
403
    cts demand but no amenities are determined.
404
405
    Returns
406
    -------
407
    df_buildings_without_amenities: gpd.GeoDataFrame
408
        Table of buildings without amenities in zensus cells
409
        with cts demand.
410
    """
411
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
412
    from saio.openstreetmap import (
413
        osm_amenities_shops_filtered,
414
        osm_buildings_filtered,
415
        osm_buildings_synthetic,
416
    )
417
418
    # buildings_filtered in cts-demand-cells without amenities
419
    with db.session_scope() as session:
420
421
        # Synthetic Buildings
422
        q_synth_buildings = session.query(
423
            osm_buildings_synthetic.cell_id.cast(Integer).label(
424
                "zensus_population_id"
425
            ),
426
            osm_buildings_synthetic.id.cast(Integer).label("id"),
427
            osm_buildings_synthetic.area.label("area"),
428
            osm_buildings_synthetic.geom_building.label("geom_building"),
429
            osm_buildings_synthetic.geom_point.label("geom_point"),
430
        )
431
432
        # Buildings filtered
433
        q_buildings_filtered = session.query(
434
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
435
            osm_buildings_filtered.id,
436
            osm_buildings_filtered.area,
437
            osm_buildings_filtered.geom_building,
438
            osm_buildings_filtered.geom_point,
439
        ).filter(
440
            osm_buildings_filtered.id
441
            == egon_map_zensus_buildings_filtered_all.id
442
        )
443
444
        # Amenities + zensus_population_id
445
        q_amenities = (
446
            session.query(
447
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
448
            )
449
            .filter(
450
                func.st_within(
451
                    osm_amenities_shops_filtered.geom_amenity,
452
                    DestatisZensusPopulationPerHa.geom,
453
                )
454
            )
455
            .distinct(DestatisZensusPopulationPerHa.id)
456
        )
457
458
        # Cells with CTS demand but without amenities
459
        q_cts_without_amenities = (
460
            session.query(
461
                EgonDemandRegioZensusElectricity.zensus_population_id,
462
            )
463
            .filter(
464
                EgonDemandRegioZensusElectricity.sector == "service",
465
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
466
            )
467
            .filter(
468
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
469
                    q_amenities
470
                )
471
            )
472
            .distinct()
473
        )
474
475
        # Buildings filtered + synthetic buildings residential in
476
        # cells with CTS demand but without amenities
477
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
478
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
479
                q_cts_without_amenities
480
            )
481
        )
482
483
    # df_buildings_without_amenities = pd.read_sql(
484
    #     cells_query.statement, cells_query.session.bind, index_col=None)
485
    df_buildings_without_amenities = gpd.read_postgis(
486
        cells_query.statement,
487
        cells_query.session.bind,
488
        geom_col="geom_building",
489
    )
490
491
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
492
        columns={
493
            # "zensus_population_id": "cell_id",
494
            "egon_building_id": "id",
495
        }
496
    )
497
498
    return df_buildings_without_amenities
499
500
501
def select_cts_buildings(df_buildings_wo_amenities):
502
    """
503
    Buildings (filtered and synthetic) in cells with
504
    cts demand are selected. Only the first building
505
    is taken for each cell and 1 amenities is assigned.
506
507
    Returns
508
    -------
509
    df_buildings_with_cts_demand: gpd.GeoDataFrame
510
        Table of buildings
511
    """
512
    # TODO Adapt method
513
    # Select one building each cell
514
    # take the first
515
    df_buildings_with_cts_demand = df_buildings_wo_amenities.drop_duplicates(
516
        # subset="cell_id", keep="first"
517
        subset="zensus_population_id",
518
        keep="first",
519
    ).reset_index(drop=True)
520
    df_buildings_with_cts_demand["n_amenities_inside"] = 1
521
    df_buildings_with_cts_demand["building"] = "cts"
522
523
    return df_buildings_with_cts_demand
524
525
526
def cells_with_cts_demand_only(df_buildings_without_amenities):
527
    """
528
    Cells with cts demand but no amenities or buildilngs
529
    are determined.
530
531
    Returns
532
    -------
533
    df_cells_only_cts_demand: gpd.GeoDataFrame
534
        Table of cells with cts demand but no amenities or buildings
535
    """
536
    from saio.openstreetmap import osm_amenities_shops_filtered
537
538
    # cells mit amenities
539
    with db.session_scope() as session:
540
        sub_query = (
541
            session.query(
542
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
543
            )
544
            .filter(
545
                func.st_within(
546
                    osm_amenities_shops_filtered.geom_amenity,
547
                    DestatisZensusPopulationPerHa.geom,
548
                )
549
            )
550
            .distinct(DestatisZensusPopulationPerHa.id)
551
        )
552
553
        cells_query = (
554
            session.query(
555
                EgonDemandRegioZensusElectricity.zensus_population_id,
556
                EgonDemandRegioZensusElectricity.scenario,
557
                EgonDemandRegioZensusElectricity.sector,
558
                EgonDemandRegioZensusElectricity.demand,
559
                DestatisZensusPopulationPerHa.geom,
560
            )
561
            .filter(
562
                EgonDemandRegioZensusElectricity.sector == "service",
563
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
564
            )
565
            .filter(
566
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
567
                    sub_query
568
                )
569
            )
570
            .filter(
571
                EgonDemandRegioZensusElectricity.zensus_population_id
572
                == DestatisZensusPopulationPerHa.id
573
            )
574
        )
575
576
    df_cts_cell_without_amenities = gpd.read_postgis(
577
        cells_query.statement,
578
        cells_query.session.bind,
579
        geom_col="geom",
580
        index_col=None,
581
    )
582
583
    # TODO maybe remove
584
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
585
        columns={"cell_id": "zensus_population_id"}
586
    )
587
588
    # Census cells with only cts demand
589
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
590
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
591
            df_buildings_without_amenities["zensus_population_id"].unique()
592
        )
593
    ]
594
595
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
596
597
    return df_cells_only_cts_demand
598
599
600
def calc_census_cell_share(scenario="eGon2035"):
601
    """
602
    The profile share for each census cell is calculated by it's
603
    share of annual demand per substation bus. The annual demand
604
    per cell is defined by DemandRegio. The share is for both
605
    scenarios identical as the annual demand is linearly scaled.
606
607
    Parameters
608
    ----------
609
    scenario: str
610
        Scenario for which the share is calculated.
611
612
    Returns
613
    -------
614
    df_census_share: pd.DataFrame
615
    """
616
617
    with db.session_scope() as session:
618
        cells_query = (
619
            session.query(
620
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
621
            )
622
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
623
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
624
            .filter(
625
                EgonDemandRegioZensusElectricity.zensus_population_id
626
                == MapZensusGridDistricts.zensus_population_id
627
            )
628
        )
629
630
    df_demand_regio_electricity_demand = pd.read_sql(
631
        cells_query.statement,
632
        cells_query.session.bind,
633
        index_col="zensus_population_id",
634
    )
635
636
    # get demand share of cell per bus
637
    df_census_share = df_demand_regio_electricity_demand[
638
        "demand"
639
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
640
        "demand"
641
    ].transform(
642
        "sum"
643
    )
644
    df_census_share = df_census_share.rename("cell_share")
645
646
    df_census_share = pd.concat(
647
        [
648
            df_census_share,
649
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
650
        ],
651
        axis=1,
652
    )
653
654
    df_census_share.reset_index(inplace=True)
655
    return df_census_share
656
657
658
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
659
    """
660
    Share of cts electricity demand profile per bus for every selected building
661
    is calculated. Building-amenity share is multiplied with census cell share
662
    to get the substation bus profile share for each building. The share is
663
    grouped and aggregated per building as some cover multiple cells.
664
665
    Parameters
666
    ----------
667
    df_cts_buildings: gpd.GeoDataFrame
668
        Table of all buildings with cts demand assigned
669
    scenario: str
670
        Scenario for which the share is calculated.
671
672
    Returns
673
    -------
674
    df_building_share: pd.DataFrame
675
        Table of bus profile share per building
676
677
    """
678
679
    def calc_building_amenity_share(df_cts_buildings):
680
        """
681
        Calculate the building share by the number amenities per building
682
        within a census cell.
683
        """
684
        df_building_amenity_share = df_cts_buildings[
685
            "n_amenities_inside"
686
        ] / df_cts_buildings.groupby("zensus_population_id")[
687
            "n_amenities_inside"
688
        ].transform(
689
            "sum"
690
        )
691
        df_building_amenity_share = pd.concat(
692
            [
693
                df_building_amenity_share.rename("building_amenity_share"),
694
                df_cts_buildings[["zensus_population_id", "id"]],
695
            ],
696
            axis=1,
697
        )
698
        return df_building_amenity_share
699
700
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
701
702
    df_census_cell_share = calc_census_cell_share(scenario)
703
704
    df_demand_share = pd.merge(
705
        left=df_building_amenity_share,
706
        right=df_census_cell_share,
707
        left_on="zensus_population_id",
708
        right_on="zensus_population_id",
709
    )
710
    df_demand_share["profile_share"] = df_demand_share[
711
        "building_amenity_share"
712
    ].multiply(df_demand_share["cell_share"])
713
714
    df_demand_share = df_demand_share[
715
        ["id", "bus_id", "scenario", "profile_share"]
716
    ]
717
    # Group and aggregate per building for multi cell buildings
718
    df_demand_share = (
719
        df_demand_share.groupby(["scenario", "id", "bus_id"])
720
        .sum()
721
        .reset_index()
722
    )
723
724
    return df_demand_share
725
726
727
def calc_building_profiles(
728
    df_demand_share=None,
729
    egon_building_id=None,
730
    bus_id=None,
731
    scenario="eGon2035",
732
):
733
    """
734
    Calculate the demand profile for each building. The profile is
735
    calculated by the demand share of the building per substation bus.
736
737
    Parameters
738
    ----------
739
    df_demand_share: pd.DataFrame
740
        Table of demand share per building. If not given, table is
741
        sourced from database.
742
    egon_building_id: int
743
        Id of the building for which the profile is calculated. If not
744
        given, the profiles are calculated for all buildings.
745
746
    Returns
747
    -------
748
    df_building_profiles: pd.DataFrame
749
        Table of demand profile per building
750
    """
751
752
    if not isinstance(df_demand_share, pd.DataFrame):
753
        with db.session_scope() as session:
754
            cells_query = session.query(
755
                EgonCtsElectricityDemandBuildingShare,
756
            ).filter(
757
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
758
            )
759
760
        df_demand_share = pd.read_sql(
761
            cells_query.statement, cells_query.session.bind, index_col=None
762
        )
763
    else:
764
        df_demand_share = df_demand_share.loc[
765
            df_demand_share["scenario"] == scenario
766
        ]
767
768
    df_cts_profiles = calc_load_curves_cts(scenario)
769
770
    # get demand share of selected building id
771
    if isinstance(egon_building_id, int):
772
        if egon_building_id in df_demand_share["id"]:
773
            df_demand_share = df_demand_share.loc[
774
                df_demand_share["id"] == egon_building_id
775
            ]
776
        else:
777
            raise KeyError(f"Building with id {egon_building_id} not found")
778
779
    # get demand share of all buildings for selected bus id
780
    if isinstance(bus_id, int):
781
        if bus_id in df_demand_share["bus_id"]:
782
            df_demand_share = df_demand_share.loc[
783
                df_demand_share["bus_id"] == bus_id
784
            ]
785
        else:
786
            raise KeyError(f"Bus with id {bus_id} not found")
787
788
    # get demand profile for all buildings for selected demand share
789
    df_building_profiles = pd.DataFrame()
790
    for bus_id, df in df_demand_share.groupby("bus_id"):
791
        shares = df.set_index("id", drop=True)["profile_share"]
792
        profile = df_cts_profiles.loc[:, bus_id]
793
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 790 is not entered. Are you sure this can never be the case?
Loading history...
794
        df_building_profiles = pd.concat(
795
            [df_building_profiles, building_profiles], axis=1
796
        )
797
798
    return df_building_profiles
799
800
801
def cts_to_buildings():
802
    """
803
    Assigns CTS demand to buildings and calculates the respective demand
804
    profiles. The demand profile per substation are disaggregated per
805
    annual demand share of each census cell and by the number of amenities
806
    per building within the cell. If no building data is available,
807
    synthetic buildings are generated around the amenities. If no amenities
808
    but cts demand is available, buildings are randomly selected. If no
809
    building nor amenity is available, random synthetic buildings are
810
    generated. The demand share is stored in the database.
811
812
    Note:
813
    -----
814
    Cells with CTS demand, amenities and buildings do not change within
815
    the scenarios, only the demand itself. Therefore scenario eGon2035
816
    can be used universally to determine the cts buildings but not for
817
    he demand share.
818
    """
819
820
    # Buildings with amenities
821
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
822
823
    # Remove synthetic CTS buildings if existing
824
    delete_synthetic_cts_buildings()
825
826
    # Create synthetic buildings for amenites without buildings
827
    df_amenities_without_buildings = amenities_without_buildings()
828
    if df_lost_cells.empty:
829
        df_amenities_without_buildings = df_amenities_without_buildings.append(
830
            df_lost_cells, ignore_index=True
831
        )
832
833
    df_amenities_without_buildings["n_amenities_inside"] = 1
834
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
835
        df_amenities_without_buildings, points="geom_amenity"
836
    )
837
838
    # TODO write to DB and remove renaming
839
    write_table_to_postgis(
840
        df_synthetic_buildings_with_amenities.rename(
841
            columns={
842
                "zensus_population_id": "cell_id",
843
                "egon_building_id": "id",
844
            }
845
        ),
846
        OsmBuildingsSynthetic,
847
        drop=False,
848
    )
849
850
    # Cells without amenities but CTS demand and buildings
851
    df_buildings_without_amenities = buildings_without_amenities()
852
853
    # TODO Fix Adhoc Bugfix duplicated buildings
854
    mask = df_buildings_without_amenities.loc[
855
        df_buildings_without_amenities["id"].isin(
856
            df_buildings_with_amenities["id"]
857
        )
858
    ].index
859
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
860
        index=mask
861
    ).reset_index(drop=True)
862
863
    df_buildings_without_amenities = select_cts_buildings(
864
        df_buildings_without_amenities
865
    )
866
    df_buildings_without_amenities["n_amenities_inside"] = 1
867
868
    # Create synthetic amenities and buildings in cells with only CTS demand
869
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
870
        df_buildings_without_amenities
871
    )
872
    # Only 1 Amenity per cell
873
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
874
    # Only 1 Amenity per Building
875
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
876
        df_cells_with_cts_demand_only, amenities=1
877
    )
878
    # Leads to only 1 building per cell
879
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
880
        df_cells_with_cts_demand_only, points="geom_point"
881
    )
882
883
    # TODO write to DB and remove renaming
884
    write_table_to_postgis(
885
        df_synthetic_buildings_without_amenities.rename(
886
            columns={
887
                "zensus_population_id": "cell_id",
888
                "egon_building_id": "id",
889
            }
890
        ),
891
        OsmBuildingsSynthetic,
892
        drop=False,
893
    )
894
895
    # Concat all buildings
896
    columns = [
897
        "zensus_population_id",
898
        "id",
899
        "geom_building",
900
        "n_amenities_inside",
901
        "source",
902
    ]
903
904
    df_buildings_with_amenities["source"] = "bwa"
905
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
906
    df_buildings_without_amenities["source"] = "bwoa"
907
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
908
909
    df_cts_buildings = pd.concat(
910
        [
911
            df_buildings_with_amenities[columns],
912
            df_synthetic_buildings_with_amenities[columns],
913
            df_buildings_without_amenities[columns],
914
            df_synthetic_buildings_without_amenities[columns],
915
        ],
916
        axis=0,
917
        ignore_index=True,
918
    )
919
    # TODO maybe remove after #772
920
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
921
922
    # Write table to db for debugging
923
    # TODO remove later
924
    df_cts_buildings = gpd.GeoDataFrame(
925
        df_cts_buildings, geometry="geom_building", crs=3035
926
    )
927
    df_cts_buildings = df_cts_buildings.reset_index().rename(
928
        columns={"index": "serial"}
929
    )
930
    write_table_to_postgis(
931
        df_cts_buildings,
932
        CtsBuildings,
933
        drop=True,
934
    )
935
936
    df_demand_share_2035 = calc_building_demand_profile_share(
937
        df_cts_buildings, scenario="eGon2035"
938
    )
939
    df_demand_share_100RE = calc_building_demand_profile_share(
940
        df_cts_buildings, scenario="eGon100RE"
941
    )
942
943
    df_demand_share = pd.concat(
944
        [df_demand_share_2035, df_demand_share_100RE],
945
        axis=0,
946
        ignore_index=True,
947
    )
948
949
    write_table_to_postgres(
950
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
951
    )
952
953
    return df_cts_buildings, df_demand_share
954
955
956
def get_peak_load_cts_buildings():
957
    """
958
    Get peak load of all CTS buildings for both scenarios and store in DB.
959
    """
960
961
962
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
963
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename("eGon2035")
964
    df_building_profiles = calc_building_profiles(scenario="eGon100RE")
965
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename("eGon100RE")
966
    df_peak_load = pd.concat(
967
        [df_peak_load_2035, df_peak_load_100RE], axis=1
968
    ).reset_index()
969
970
    df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
971
    df_peak_load["type"] = "cts"
972
    df_peak_load = df_peak_load.melt(
973
        id_vars=["building_id", "type"],
974
        var_name="scenario",
975
        value_name="peak_load_in_w",
976
    )
977
    # TODO Check units, maybe MwH?
978
    # Convert unit to W
979
    df_peak_load["peak_load_in_w"] = df_peak_load["peak_load_in_w"] * 1e6
980
    # Delete rows with cts demand
981
    with db.session_scope() as session:
982
        session.query(BuildingPeakLoads).filter(
983
            BuildingPeakLoads.type == "cts"
984
        ).delete()
985
986
    # Write peak loads into db
987
    with db.session_scope() as session:
988
        session.bulk_insert_mappings(
989
            BuildingPeakLoads,
990
            df_peak_load.to_dict(orient="records"),
991
        )
992
993
994
def delete_synthetic_cts_buildings():
995
    """
996
    All synthetic cts buildings are deleted from the DB. This is necessary if
997
    the task is run multiple times as the existing synthetic buildings
998
    influence the results.
999
    """
1000
    # import db tables
1001
    from saio.openstreetmap import osm_buildings_synthetic
1002
1003
    # cells mit amenities
1004
    with db.session_scope() as session:
1005
        session.query(osm_buildings_synthetic).filter(
1006
            osm_buildings_synthetic.building == "cts"
1007
        ).delete()
1008
1009
1010
class CtsElectricityBuildings(Dataset):
1011
    def __init__(self, dependencies):
1012
        super().__init__(
1013
            name="CtsElectricityBuildings",
1014
            version="0.0.0.",
1015
            dependencies=dependencies,
1016
            tasks=(
1017
                cts_to_buildings,
1018
                get_peak_load_cts_buildings,
1019
                # get_all_cts_building_profiles,
1020
            ),
1021
        )
1022