Passed
Pull Request — dev (#826)
by
unknown
02:14
created

data.datasets.electricity_demand_timeseries.cts_buildings   A

Complexity

Total Complexity 42

Size/Duplication

Total Lines 1033
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 42
eloc 524
dl 0
loc 1033
rs 9.0399
c 0
b 0
f 0

13 Functions

Rating   Name   Duplication   Size   Complexity  
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A amenities_without_buildings() 0 60 2
A delete_synthetic_cts_buildings() 0 14 2
A get_peak_load_cts_buildings() 0 35 3
C calc_building_profiles() 0 75 9
A select_cts_buildings() 0 23 1
B cells_with_cts_demand_only() 0 72 2
B cts_to_buildings() 0 157 2
A calc_building_demand_profile_share() 0 71 2
C buildings_with_amenities() 0 162 5
A calc_census_cell_share() 0 56 2
B buildings_without_amenities() 0 99 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 8 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    BuildingPeakLoads,
17
    OsmBuildingsSynthetic,
18
)
19
from egon.data.datasets.electricity_demand_timeseries.tools import (
20
    random_ints_until_sum,
21
    random_point_in_square,
22
    specific_int_until_sum,
23
    write_table_to_postgis,
24
    write_table_to_postgres,
25
)
26
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
27
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
28
import egon.data.config
29
30
engine = db.engine()
31
Base = declarative_base()
32
33
# import db tables
34
saio.register_schema("openstreetmap", engine=engine)
35
saio.register_schema("society", engine=engine)
36
saio.register_schema("demand", engine=engine)
37
saio.register_schema("boundaries", engine=engine)
38
39
40
class EgonCtsElectricityDemandBuildingShare(Base):
41
    __tablename__ = "egon_cts_electricity_demand_building_share"
42
    __table_args__ = {"schema": "demand"}
43
44
    serial = Column(Integer, primary_key=True)
45
    id = Column(Integer, index=True)
46
    scenario = Column(String, index=True)
47
    # id = Column(Integer, primary_key=True)
48
    # scenario = Column(String, primary_key=True)
49
    bus_id = Column(Integer, index=True)
50
    profile_share = Column(Float)
51
52
53
class CtsBuildings(Base):
54
    __tablename__ = "egon_cts_buildings"
55
    __table_args__ = {"schema": "openstreetmap"}
56
57
    serial = Column(Integer, primary_key=True)
58
    id = Column(Integer, index=True)
59
    zensus_population_id = Column(Integer, index=True)
60
    geom_building = Column(Geometry("Polygon", 3035))
61
    n_amenities_inside = Column(Integer)
62
    source = Column(String)
63
64
65
def amenities_without_buildings():
66
    """
67
    Amenities which have no buildings assigned and are in
68
    a cell with cts demand are determined.
69
70
    Returns
71
    -------
72
    pd.DataFrame
73
        Table of amenities without buildings
74
    """
75
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
76
77
    with db.session_scope() as session:
78
        cells_query = (
79
            session.query(
80
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
81
                # TODO can be used for square around amenity
82
                #  (1 geom_amenity: 1 geom_building)
83
                #  not unique amenity_ids yet
84
                osm_amenities_not_in_buildings_filtered.geom_amenity,
85
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
86
                # EgonDemandRegioZensusElectricity.demand,
87
                # # TODO can be used to generate n random buildings
88
                # # (n amenities : 1 randombuilding)
89
                # func.count(
90
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
91
                # ).label("n_amenities_inside"),
92
                # DestatisZensusPopulationPerHa.geom,
93
            )
94
            .filter(
95
                func.st_within(
96
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
97
                    DestatisZensusPopulationPerHa.geom,
98
                )
99
            )
100
            .filter(
101
                DestatisZensusPopulationPerHa.id
102
                == EgonDemandRegioZensusElectricity.zensus_population_id
103
            )
104
            .filter(
105
                EgonDemandRegioZensusElectricity.sector == "service",
106
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
107
                #         ).group_by(
108
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
109
                #             DestatisZensusPopulationPerHa.geom,
110
            )
111
        )
112
    # # TODO can be used to generate n random buildings
113
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
114
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
115
    # )
116
    #
117
118
    # # TODO can be used for square around amenity
119
    df_amenities_without_buildings = gpd.read_postgis(
120
        cells_query.statement,
121
        cells_query.session.bind,
122
        geom_col="geom_amenity",
123
    )
124
    return df_amenities_without_buildings
125
126
127
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
128
    """
129
    Building centroids are placed randomly within census cells.
130
    The Number of buildings is derived from n_amenity_inside, the selected
131
    method and number of amenities per building.
132
133
    Returns
134
    -------
135
    df: gpd.GeoDataFrame
136
        Table of buildings centroids
137
    """
138
    if isinstance(max_amenities, int):
139
        # amount of amenities is randomly generated within bounds
140
        # (max_amenities, amenities per cell)
141
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
142
            random_ints_until_sum, args=[max_amenities]
143
        )
144
    if isinstance(amenities, int):
145
        # Specific amount of amenities per building
146
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
147
            specific_int_until_sum, args=[amenities]
148
        )
149
150
    # Unnest each building
151
    df = df.explode(column="n_amenities_inside")
152
153
    # building count per cell
154
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
155
156
    # generate random synthetic buildings
157
    edge_length = 5
158
    # create random points within census cells
159
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
160
161
    df.reset_index(drop=True, inplace=True)
162
    # Store center of polygon
163
    df["geom_point"] = points
164
    # Drop geometry of census cell
165
    df = df.drop(columns=["geom"])
166
167
    return df
168
169
170
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
171
    """
172
    Synthetic buildings are generated around points.
173
174
    Parameters
175
    ----------
176
    df: pd.DataFrame
177
        Table of census cells
178
    points: gpd.GeoSeries or str
179
        List of points to place buildings around or column name of df
180
    crs: str
181
        CRS of result table
182
183
    Returns
184
    -------
185
    df: gpd.GeoDataFrame
186
        Synthetic buildings
187
    """
188
189
    if isinstance(points, str) and points in df.columns:
190
        points = df[points]
191
    elif isinstance(points, gpd.GeoSeries):
192
        pass
193
    else:
194
        raise ValueError("Points are of the wrong type")
195
196
    # Create building using a square around point
197
    edge_length = 5
198
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
199
200
    if "geom_point" not in df.columns:
201
        df["geom_point"] = df["geom_building"].centroid
202
203
    # TODO Check CRS
204
    df = gpd.GeoDataFrame(
205
        df,
206
        crs=crs,
207
        geometry="geom_building",
208
    )
209
210
    # TODO remove after implementation of egon_building_id
211
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
212
213
    # get max number of building ids from synthetic residential table
214
    with db.session_scope() as session:
215
        max_synth_residential_id = session.execute(
216
            func.max(OsmBuildingsSynthetic.id)
217
        ).scalar()
218
    max_synth_residential_id = int(max_synth_residential_id)
219
220
    # create sequential ids
221
    df["egon_building_id"] = range(
222
        max_synth_residential_id + 1,
223
        max_synth_residential_id + df.shape[0] + 1,
224
    )
225
226
    df["area"] = df["geom_building"].area
227
    # set building type of synthetic building
228
    df["building"] = "cts"
229
    # TODO remove in #772
230
    df = df.rename(
231
        columns={
232
            # "zensus_population_id": "cell_id",
233
            "egon_building_id": "id",
234
        }
235
    )
236
    return df
237
238
239
def buildings_with_amenities():
240
    """
241
    Amenities which are assigned to buildings are determined
242
    and grouped per building and zensus cell. Buildings
243
    covering multiple cells therefore exists multiple times
244
    but in different zensus cells. This is necessary to cover
245
    all cells with a cts demand. If buildings exist in multiple
246
    substations, their amenities are summed and assigned and kept in
247
    one substation only. If as a result, a census cell is uncovered,
248
    a synthetic amenity is placed. The buildings are aggregated
249
    afterwards during the calculation of the profile_share.
250
251
    Returns
252
    -------
253
    df_buildings_with_amenities: gpd.GeoDataFrame
254
        Contains all buildings with amenities per zensus cell.
255
    df_lost_cells: gpd.GeoDataFrame
256
        Contains synthetic amenities in lost cells. Might be empty
257
    """
258
259
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
260
261
    with db.session_scope() as session:
262
        cells_query = (
263
            session.query(
264
                osm_amenities_in_buildings_filtered,
265
                MapZensusGridDistricts.bus_id,
266
            )
267
            .filter(
268
                MapZensusGridDistricts.zensus_population_id
269
                == osm_amenities_in_buildings_filtered.zensus_population_id
270
            )
271
            .filter(
272
                EgonDemandRegioZensusElectricity.zensus_population_id
273
                == osm_amenities_in_buildings_filtered.zensus_population_id
274
            )
275
            .filter(
276
                EgonDemandRegioZensusElectricity.sector == "service",
277
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
278
            )
279
        )
280
    df_amenities_in_buildings = pd.read_sql(
281
        cells_query.statement, cells_query.session.bind, index_col=None
282
    )
283
284
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
285
        "geom_building"
286
    ].apply(to_shape)
287
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
288
        "geom_amenity"
289
    ].apply(to_shape)
290
291
    df_amenities_in_buildings["n_amenities_inside"] = 1
292
293
    # add identifier column for buildings in multiple substations
294
    df_amenities_in_buildings[
295
        "duplicate_identifier"
296
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
297
        "n_amenities_inside"
298
    ].transform(
299
        "cumsum"
300
    )
301
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
302
        ["id", "duplicate_identifier"]
303
    )
304
    # sum amenities of buildings with multiple substations
305
    df_amenities_in_buildings[
306
        "n_amenities_inside"
307
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
308
        "n_amenities_inside"
309
    ].transform(
310
        "sum"
311
    )
312
    # identify lost zensus cells
313
    df_lost_cells = df_amenities_in_buildings.loc[
314
        df_amenities_in_buildings.duplicated(
315
            subset=["id", "duplicate_identifier"], keep="first"
316
        )
317
    ]["zensus_population_id"]
318
319
    # drop duplicated buildings
320
    df_amenities_in_buildings.drop_duplicates(
321
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
322
    )
323
324
    # check if lost zensus cells are already covered
325
    if not df_lost_cells.empty:
326
        if not (
327
            df_amenities_in_buildings["zensus_population_id"]
328
            .isin(df_lost_cells)
329
            .empty
330
        ):
331
            # query geom data for cell if not
332
            with db.session_scope() as session:
333
                cells_query = session.query(
334
                    DestatisZensusPopulationPerHa.id,
335
                    DestatisZensusPopulationPerHa.geom,
336
                ).filter(DestatisZensusPopulationPerHa.id.in_(df_lost_cells))
337
338
            df_lost_cells = gpd.read_postgis(
339
                cells_query.statement,
340
                cells_query.session.bind,
341
                geom_col="geom",
342
            )
343
            # TODO maybe adapt method
344
            # place random amenity in cell
345
            df_lost_cells["n_amenities_inside"] = 1
346
            df_lost_cells.rename(
347
                columns={
348
                    "id": "zensus_population_id",
349
                },
350
                inplace=True,
351
            )
352
            df_lost_cells = place_buildings_with_amenities(
353
                df_lost_cells, amenities=1
354
            )
355
            df_lost_cells.rename(
356
                columns={
357
                    # "id": "zensus_population_id",
358
                    "geom_point": "geom_amenity",
359
                },
360
                inplace=True,
361
            )
362
            df_lost_cells.drop(
363
                columns=["building_count", "n_amenities_inside"], inplace=True
364
            )
365
        else:
366
            df_lost_cells = None
367
    else:
368
        df_lost_cells = None
369
370
    # drop helper columns
371
    df_amenities_in_buildings.drop(
372
        columns=["duplicate_identifier"], inplace=True
373
    )
374
375
    # sum amenities per building and cell
376
    df_amenities_in_buildings[
377
        "n_amenities_inside"
378
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
379
        "n_amenities_inside"
380
    ].transform(
381
        "sum"
382
    )
383
    # drop duplicated buildings
384
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
385
        ["id", "zensus_population_id"]
386
    )
387
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
388
389
    df_buildings_with_amenities = df_buildings_with_amenities[
390
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
391
    ]
392
    df_buildings_with_amenities.rename(
393
        columns={
394
            # "zensus_population_id": "cell_id",
395
            "egon_building_id": "id"
396
        },
397
        inplace=True,
398
    )
399
400
    return df_buildings_with_amenities, df_lost_cells
401
402
403
def buildings_without_amenities():
404
    """
405
    Buildings (filtered and synthetic) in cells with
406
    cts demand but no amenities are determined.
407
408
    Returns
409
    -------
410
    df_buildings_without_amenities: gpd.GeoDataFrame
411
        Table of buildings without amenities in zensus cells
412
        with cts demand.
413
    """
414
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
415
    from saio.openstreetmap import (
416
        osm_amenities_shops_filtered,
417
        osm_buildings_filtered,
418
        osm_buildings_synthetic,
419
    )
420
421
    # buildings_filtered in cts-demand-cells without amenities
422
    with db.session_scope() as session:
423
424
        # Synthetic Buildings
425
        q_synth_buildings = session.query(
426
            osm_buildings_synthetic.cell_id.cast(Integer).label(
427
                "zensus_population_id"
428
            ),
429
            osm_buildings_synthetic.id.cast(Integer).label("id"),
430
            osm_buildings_synthetic.area.label("area"),
431
            osm_buildings_synthetic.geom_building.label("geom_building"),
432
            osm_buildings_synthetic.geom_point.label("geom_point"),
433
        )
434
435
        # Buildings filtered
436
        q_buildings_filtered = session.query(
437
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
438
            osm_buildings_filtered.id,
439
            osm_buildings_filtered.area,
440
            osm_buildings_filtered.geom_building,
441
            osm_buildings_filtered.geom_point,
442
        ).filter(
443
            osm_buildings_filtered.id
444
            == egon_map_zensus_buildings_filtered_all.id
445
        )
446
447
        # Amenities + zensus_population_id
448
        q_amenities = (
449
            session.query(
450
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
451
            )
452
            .filter(
453
                func.st_within(
454
                    osm_amenities_shops_filtered.geom_amenity,
455
                    DestatisZensusPopulationPerHa.geom,
456
                )
457
            )
458
            .distinct(DestatisZensusPopulationPerHa.id)
459
        )
460
461
        # Cells with CTS demand but without amenities
462
        q_cts_without_amenities = (
463
            session.query(
464
                EgonDemandRegioZensusElectricity.zensus_population_id,
465
            )
466
            .filter(
467
                EgonDemandRegioZensusElectricity.sector == "service",
468
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
469
            )
470
            .filter(
471
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
472
                    q_amenities
473
                )
474
            )
475
            .distinct()
476
        )
477
478
        # Buildings filtered + synthetic buildings residential in
479
        # cells with CTS demand but without amenities
480
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
481
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
482
                q_cts_without_amenities
483
            )
484
        )
485
486
    # df_buildings_without_amenities = pd.read_sql(
487
    #     cells_query.statement, cells_query.session.bind, index_col=None)
488
    df_buildings_without_amenities = gpd.read_postgis(
489
        cells_query.statement,
490
        cells_query.session.bind,
491
        geom_col="geom_building",
492
    )
493
494
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
495
        columns={
496
            # "zensus_population_id": "cell_id",
497
            "egon_building_id": "id",
498
        }
499
    )
500
501
    return df_buildings_without_amenities
502
503
504
def select_cts_buildings(df_buildings_wo_amenities):
505
    """
506
    Buildings (filtered and synthetic) in cells with
507
    cts demand are selected. Only the first building
508
    is taken for each cell and 1 amenities is assigned.
509
510
    Returns
511
    -------
512
    df_buildings_with_cts_demand: gpd.GeoDataFrame
513
        Table of buildings
514
    """
515
    # TODO Adapt method
516
    # Select one building each cell
517
    # take the first
518
    df_buildings_with_cts_demand = df_buildings_wo_amenities.drop_duplicates(
519
        # subset="cell_id", keep="first"
520
        subset="zensus_population_id",
521
        keep="first",
522
    ).reset_index(drop=True)
523
    df_buildings_with_cts_demand["n_amenities_inside"] = 1
524
    df_buildings_with_cts_demand["building"] = "cts"
525
526
    return df_buildings_with_cts_demand
527
528
529
def cells_with_cts_demand_only(df_buildings_without_amenities):
530
    """
531
    Cells with cts demand but no amenities or buildilngs
532
    are determined.
533
534
    Returns
535
    -------
536
    df_cells_only_cts_demand: gpd.GeoDataFrame
537
        Table of cells with cts demand but no amenities or buildings
538
    """
539
    from saio.openstreetmap import osm_amenities_shops_filtered
540
541
    # cells mit amenities
542
    with db.session_scope() as session:
543
        sub_query = (
544
            session.query(
545
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
546
            )
547
            .filter(
548
                func.st_within(
549
                    osm_amenities_shops_filtered.geom_amenity,
550
                    DestatisZensusPopulationPerHa.geom,
551
                )
552
            )
553
            .distinct(DestatisZensusPopulationPerHa.id)
554
        )
555
556
        cells_query = (
557
            session.query(
558
                EgonDemandRegioZensusElectricity.zensus_population_id,
559
                EgonDemandRegioZensusElectricity.scenario,
560
                EgonDemandRegioZensusElectricity.sector,
561
                EgonDemandRegioZensusElectricity.demand,
562
                DestatisZensusPopulationPerHa.geom,
563
            )
564
            .filter(
565
                EgonDemandRegioZensusElectricity.sector == "service",
566
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
567
            )
568
            .filter(
569
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
570
                    sub_query
571
                )
572
            )
573
            .filter(
574
                EgonDemandRegioZensusElectricity.zensus_population_id
575
                == DestatisZensusPopulationPerHa.id
576
            )
577
        )
578
579
    df_cts_cell_without_amenities = gpd.read_postgis(
580
        cells_query.statement,
581
        cells_query.session.bind,
582
        geom_col="geom",
583
        index_col=None,
584
    )
585
586
    # TODO maybe remove
587
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
588
        columns={"cell_id": "zensus_population_id"}
589
    )
590
591
    # Census cells with only cts demand
592
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
593
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
594
            df_buildings_without_amenities["zensus_population_id"].unique()
595
        )
596
    ]
597
598
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
599
600
    return df_cells_only_cts_demand
601
602
603
def calc_census_cell_share(scenario="eGon2035"):
604
    """
605
    The profile share for each census cell is calculated by it's
606
    share of annual demand per substation bus. The annual demand
607
    per cell is defined by DemandRegio. The share is for both
608
    scenarios identical as the annual demand is linearly scaled.
609
610
    Parameters
611
    ----------
612
    scenario: str
613
        Scenario for which the share is calculated.
614
615
    Returns
616
    -------
617
    df_census_share: pd.DataFrame
618
    """
619
620
    with db.session_scope() as session:
621
        cells_query = (
622
            session.query(
623
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
624
            )
625
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
626
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
627
            .filter(
628
                EgonDemandRegioZensusElectricity.zensus_population_id
629
                == MapZensusGridDistricts.zensus_population_id
630
            )
631
        )
632
633
    df_demand_regio_electricity_demand = pd.read_sql(
634
        cells_query.statement,
635
        cells_query.session.bind,
636
        index_col="zensus_population_id",
637
    )
638
639
    # get demand share of cell per bus
640
    df_census_share = df_demand_regio_electricity_demand[
641
        "demand"
642
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
643
        "demand"
644
    ].transform(
645
        "sum"
646
    )
647
    df_census_share = df_census_share.rename("cell_share")
648
649
    df_census_share = pd.concat(
650
        [
651
            df_census_share,
652
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
653
        ],
654
        axis=1,
655
    )
656
657
    df_census_share.reset_index(inplace=True)
658
    return df_census_share
659
660
661
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
662
    """
663
    Share of cts electricity demand profile per bus for every selected building
664
    is calculated. Building-amenity share is multiplied with census cell share
665
    to get the substation bus profile share for each building. The share is
666
    grouped and aggregated per building as some cover multiple cells.
667
668
    Parameters
669
    ----------
670
    df_cts_buildings: gpd.GeoDataFrame
671
        Table of all buildings with cts demand assigned
672
    scenario: str
673
        Scenario for which the share is calculated.
674
675
    Returns
676
    -------
677
    df_building_share: pd.DataFrame
678
        Table of bus profile share per building
679
680
    """
681
682
    def calc_building_amenity_share(df_cts_buildings):
683
        """
684
        Calculate the building share by the number amenities per building
685
        within a census cell.
686
        """
687
        df_building_amenity_share = df_cts_buildings[
688
            "n_amenities_inside"
689
        ] / df_cts_buildings.groupby("zensus_population_id")[
690
            "n_amenities_inside"
691
        ].transform(
692
            "sum"
693
        )
694
        df_building_amenity_share = pd.concat(
695
            [
696
                df_building_amenity_share.rename("building_amenity_share"),
697
                df_cts_buildings[["zensus_population_id", "id"]],
698
            ],
699
            axis=1,
700
        )
701
        return df_building_amenity_share
702
703
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
704
705
    df_census_cell_share = calc_census_cell_share(scenario)
706
707
    df_demand_share = pd.merge(
708
        left=df_building_amenity_share,
709
        right=df_census_cell_share,
710
        left_on="zensus_population_id",
711
        right_on="zensus_population_id",
712
    )
713
    df_demand_share["profile_share"] = df_demand_share[
714
        "building_amenity_share"
715
    ].multiply(df_demand_share["cell_share"])
716
717
    df_demand_share = df_demand_share[
718
        ["id", "bus_id", "scenario", "profile_share"]
719
    ]
720
    # TODO adapt groupby?
721
    # Group and aggregate per building for multi cell buildings
722
    df_demand_share = (
723
        df_demand_share.groupby(["scenario", "id", "bus_id"])
724
        .sum()
725
        .reset_index()
726
    )
727
    if df_demand_share.duplicated("id", keep=False).any():
728
        print(
729
            df_demand_share.loc[df_demand_share.duplicated("id", keep=False)]
730
        )
731
    return df_demand_share
732
733
734
def calc_building_profiles(
735
    df_demand_share=None,
736
    egon_building_id=None,
737
    bus_id=None,
738
    scenario="eGon2035",
739
):
740
    """
741
    Calculate the demand profile for each building. The profile is
742
    calculated by the demand share of the building per substation bus.
743
744
    Parameters
745
    ----------
746
    df_demand_share: pd.DataFrame
747
        Table of demand share per building. If not given, table is
748
        sourced from database.
749
    egon_building_id: int
750
        Id of the building for which the profile is calculated. If not
751
        given, the profiles are calculated for all buildings.
752
753
    Returns
754
    -------
755
    df_building_profiles: pd.DataFrame
756
        Table of demand profile per building
757
    """
758
759
    if not isinstance(df_demand_share, pd.DataFrame):
760
        with db.session_scope() as session:
761
            cells_query = session.query(
762
                EgonCtsElectricityDemandBuildingShare,
763
            ).filter(
764
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
765
            )
766
767
        df_demand_share = pd.read_sql(
768
            cells_query.statement, cells_query.session.bind, index_col=None
769
        )
770
    else:
771
        df_demand_share = df_demand_share.loc[
772
            df_demand_share["scenario"] == scenario
773
        ]
774
775
    # TODO workaround
776
    df_demand_share = df_demand_share.drop(columns="serial")
777
778
    df_cts_profiles = calc_load_curves_cts(scenario)
779
780
    # get demand share of selected building id
781
    if isinstance(egon_building_id, int):
782
        if egon_building_id in df_demand_share["id"]:
783
            df_demand_share = df_demand_share.loc[
784
                df_demand_share["id"] == egon_building_id
785
            ]
786
        else:
787
            raise KeyError(f"Building with id {egon_building_id} not found")
788
789
    # get demand share of all buildings for selected bus id
790
    if isinstance(bus_id, int):
791
        if bus_id in df_demand_share["bus_id"]:
792
            df_demand_share = df_demand_share.loc[
793
                df_demand_share["bus_id"] == bus_id
794
            ]
795
        else:
796
            raise KeyError(f"Bus with id {bus_id} not found")
797
798
    # get demand profile for all buildings for selected demand share
799
    df_building_profiles = pd.DataFrame()
800
    for bus_id, df in df_demand_share.groupby("bus_id"):
801
        shares = df.set_index("id", drop=True)["profile_share"]
802
        profile = df_cts_profiles.loc[:, bus_id]
803
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 800 is not entered. Are you sure this can never be the case?
Loading history...
804
        df_building_profiles = pd.concat(
805
            [df_building_profiles, building_profiles], axis=1
806
        )
807
808
    return df_building_profiles
809
810
811
def cts_to_buildings():
812
    """
813
    Assigns CTS demand to buildings and calculates the respective demand
814
    profiles. The demand profile per substation are disaggregated per
815
    annual demand share of each census cell and by the number of amenities
816
    per building within the cell. If no building data is available,
817
    synthetic buildings are generated around the amenities. If no amenities
818
    but cts demand is available, buildings are randomly selected. If no
819
    building nor amenity is available, random synthetic buildings are
820
    generated. The demand share is stored in the database.
821
822
    Note:
823
    -----
824
    Cells with CTS demand, amenities and buildings do not change within
825
    the scenarios, only the demand itself. Therefore scenario eGon2035
826
    can be used universally to determine the cts buildings but not for
827
    he demand share.
828
    """
829
830
    # Buildings with amenities
831
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
832
833
    # Remove synthetic CTS buildings if existing
834
    delete_synthetic_cts_buildings()
835
836
    # Create synthetic buildings for amenites without buildings
837
    df_amenities_without_buildings = amenities_without_buildings()
838
    if df_lost_cells.empty:
839
        df_amenities_without_buildings = df_amenities_without_buildings.append(
840
            df_lost_cells, ignore_index=True
841
        )
842
843
    df_amenities_without_buildings["n_amenities_inside"] = 1
844
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
845
        df_amenities_without_buildings, points="geom_amenity"
846
    )
847
848
    # TODO write to DB and remove renaming
849
    write_table_to_postgis(
850
        df_synthetic_buildings_with_amenities.rename(
851
            columns={
852
                "zensus_population_id": "cell_id",
853
                "egon_building_id": "id",
854
            }
855
        ),
856
        OsmBuildingsSynthetic,
857
        drop=False,
858
    )
859
860
    # Cells without amenities but CTS demand and buildings
861
    df_buildings_without_amenities = buildings_without_amenities()
862
863
    # TODO Fix Adhoc Bugfix duplicated buildings
864
    mask = df_buildings_without_amenities.loc[
865
        df_buildings_without_amenities["id"].isin(
866
            df_buildings_with_amenities["id"]
867
        )
868
    ].index
869
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
870
        index=mask
871
    ).reset_index(drop=True)
872
873
    df_buildings_without_amenities = select_cts_buildings(
874
        df_buildings_without_amenities
875
    )
876
    df_buildings_without_amenities["n_amenities_inside"] = 1
877
878
    # Create synthetic amenities and buildings in cells with only CTS demand
879
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
880
        df_buildings_without_amenities
881
    )
882
    # Only 1 Amenity per cell
883
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
884
    # Only 1 Amenity per Building
885
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
886
        df_cells_with_cts_demand_only, amenities=1
887
    )
888
    # Leads to only 1 building per cell
889
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
890
        df_cells_with_cts_demand_only, points="geom_point"
891
    )
892
893
    # TODO write to DB and remove (backup) renaming
894
    write_table_to_postgis(
895
        df_synthetic_buildings_without_amenities.rename(
896
            columns={
897
                "zensus_population_id": "cell_id",
898
                "egon_building_id": "id",
899
            }
900
        ),
901
        OsmBuildingsSynthetic,
902
        drop=False,
903
    )
904
905
    # Concat all buildings
906
    columns = [
907
        "zensus_population_id",
908
        "id",
909
        "geom_building",
910
        "n_amenities_inside",
911
        "source",
912
    ]
913
914
    df_buildings_with_amenities["source"] = "bwa"
915
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
916
    df_buildings_without_amenities["source"] = "bwoa"
917
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
918
919
    df_cts_buildings = pd.concat(
920
        [
921
            df_buildings_with_amenities[columns],
922
            df_synthetic_buildings_with_amenities[columns],
923
            df_buildings_without_amenities[columns],
924
            df_synthetic_buildings_without_amenities[columns],
925
        ],
926
        axis=0,
927
        ignore_index=True,
928
    )
929
    # TODO maybe remove after #772
930
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
931
932
    # Write table to db for debugging
933
    # TODO remove later
934
    df_cts_buildings = gpd.GeoDataFrame(
935
        df_cts_buildings, geometry="geom_building", crs=3035
936
    )
937
    df_cts_buildings = df_cts_buildings.reset_index().rename(
938
        columns={"index": "serial"}
939
    )
940
    write_table_to_postgis(
941
        df_cts_buildings,
942
        CtsBuildings,
943
        drop=True,
944
    )
945
946
    df_demand_share_2035 = calc_building_demand_profile_share(
947
        df_cts_buildings, scenario="eGon2035"
948
    )
949
    df_demand_share_100RE = calc_building_demand_profile_share(
950
        df_cts_buildings, scenario="eGon100RE"
951
    )
952
953
    df_demand_share = pd.concat(
954
        [df_demand_share_2035, df_demand_share_100RE],
955
        axis=0,
956
        ignore_index=True,
957
    )
958
    # TODO workaround
959
    df_demand_share = df_demand_share.reset_index().rename(
960
        columns={"index": "serial"}
961
    )
962
963
    write_table_to_postgres(
964
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
965
    )
966
967
    return df_cts_buildings, df_demand_share
968
969
970
def get_peak_load_cts_buildings():
971
    """
972
    Get peak load of all CTS buildings for both scenarios and store in DB.
973
    """
974
975
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
976
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename("eGon2035")
977
    df_building_profiles = calc_building_profiles(scenario="eGon100RE")
978
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename("eGon100RE")
979
    df_peak_load = pd.concat(
980
        [df_peak_load_2035, df_peak_load_100RE], axis=1
981
    ).reset_index()
982
983
    # TODO rename table column to egon_building_id
984
    df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
985
    df_peak_load["type"] = "cts"
986
    df_peak_load = df_peak_load.melt(
987
        id_vars=["building_id", "type"],
988
        var_name="scenario",
989
        value_name="peak_load_in_w",
990
    )
991
    # TODO Check units, maybe MwH?
992
    # Convert unit to W
993
    df_peak_load["peak_load_in_w"] = df_peak_load["peak_load_in_w"] * 1e6
994
    # Delete rows with cts demand
995
    with db.session_scope() as session:
996
        session.query(BuildingPeakLoads).filter(
997
            BuildingPeakLoads.type == "cts"
998
        ).delete()
999
1000
    # Write peak loads into db
1001
    with db.session_scope() as session:
1002
        session.bulk_insert_mappings(
1003
            BuildingPeakLoads,
1004
            df_peak_load.to_dict(orient="records"),
1005
        )
1006
1007
1008
def delete_synthetic_cts_buildings():
1009
    """
1010
    All synthetic cts buildings are deleted from the DB. This is necessary if
1011
    the task is run multiple times as the existing synthetic buildings
1012
    influence the results.
1013
    """
1014
    # import db tables
1015
    from saio.openstreetmap import osm_buildings_synthetic
1016
1017
    # cells mit amenities
1018
    with db.session_scope() as session:
1019
        session.query(osm_buildings_synthetic).filter(
1020
            osm_buildings_synthetic.building == "cts"
1021
        ).delete()
1022
1023
1024
class CtsElectricityBuildings(Dataset):
1025
    def __init__(self, dependencies):
1026
        super().__init__(
1027
            name="CtsElectricityBuildings",
1028
            version="0.0.0",
1029
            dependencies=dependencies,
1030
            tasks=(
1031
                cts_to_buildings,
1032
                get_peak_load_cts_buildings,
1033
                # get_all_cts_building_profiles,
1034
            ),
1035
        )
1036