Passed
Pull Request — dev (#826)
by
unknown
01:41
created

data.datasets.electricity_demand_timeseries.cts_buildings   B

Complexity

Total Complexity 43

Size/Duplication

Total Lines 1051
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 43
eloc 536
dl 0
loc 1051
rs 8.96
c 0
b 0
f 0

14 Functions

Rating   Name   Duplication   Size   Complexity  
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A amenities_without_buildings() 0 60 2
C calc_building_profiles() 0 72 9
A select_cts_buildings() 0 23 1
B cells_with_cts_demand_only() 0 72 2
A write_synthetic_buildings_to_db() 0 31 2
B cts_to_buildings() 0 155 2
A calc_building_demand_profile_share() 0 67 1
C buildings_with_amenities() 0 156 5
A calc_census_cell_share() 0 56 2
B buildings_without_amenities() 0 99 2
A delete_synthetic_cts_buildings() 0 14 2
A get_peak_load_cts_buildings() 0 33 3

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 8 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    BuildingPeakLoads,
17
    OsmBuildingsSynthetic,
18
)
19
from egon.data.datasets.electricity_demand_timeseries.tools import (
20
    random_ints_until_sum,
21
    random_point_in_square,
22
    specific_int_until_sum,
23
    write_table_to_postgis,
24
    write_table_to_postgres,
25
)
26
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
27
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
28
import egon.data.config
29
30
engine = db.engine()
31
Base = declarative_base()
32
33
data_config = egon.data.config.datasets()
34
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"]
35
36
# import db tables
37
saio.register_schema("openstreetmap", engine=engine)
38
saio.register_schema("society", engine=engine)
39
saio.register_schema("demand", engine=engine)
40
saio.register_schema("boundaries", engine=engine)
41
42
43
class EgonCtsElectricityDemandBuildingShare(Base):
44
    __tablename__ = "egon_cts_electricity_demand_building_share"
45
    __table_args__ = {"schema": "demand"}
46
47
    id = Column(Integer, primary_key=True)
48
    scenario = Column(String, primary_key=True)
49
    bus_id = Column(Integer, index=True)
50
    profile_share = Column(Float)
51
52
53
class CtsBuildings(Base):
54
    __tablename__ = "egon_cts_buildings"
55
    __table_args__ = {"schema": "openstreetmap"}
56
57
    serial = Column(Integer, primary_key=True)
58
    id = Column(Integer)
59
    zensus_population_id = Column(Integer, index=True)
60
    geom_building = Column(Geometry("Polygon", 3035))
61
    n_amenities_inside = Column(Integer)
62
    source = Column(String)
63
64
65
def amenities_without_buildings():
66
    """
67
    Amenities which have no buildings assigned and are in
68
    a cell with cts demand are determined.
69
70
    Returns
71
    -------
72
    pd.DataFrame
73
        Table of amenities without buildings
74
    """
75
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
76
77
    with db.session_scope() as session:
78
        cells_query = (
79
            session.query(
80
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
81
                # TODO can be used for square around amenity
82
                #  (1 geom_amenity: 1 geom_building)
83
                #  not unique amenity_ids yet
84
                osm_amenities_not_in_buildings_filtered.geom_amenity,
85
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
86
                # EgonDemandRegioZensusElectricity.demand,
87
                # # TODO can be used to generate n random buildings
88
                # # (n amenities : 1 randombuilding)
89
                # func.count(
90
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
91
                # ).label("n_amenities_inside"),
92
                # DestatisZensusPopulationPerHa.geom,
93
            )
94
            .filter(
95
                func.st_within(
96
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
97
                    DestatisZensusPopulationPerHa.geom,
98
                )
99
            )
100
            .filter(
101
                DestatisZensusPopulationPerHa.id
102
                == EgonDemandRegioZensusElectricity.zensus_population_id
103
            )
104
            .filter(
105
                EgonDemandRegioZensusElectricity.sector == "service",
106
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
107
                #         ).group_by(
108
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
109
                #             DestatisZensusPopulationPerHa.geom,
110
            )
111
        )
112
    # # TODO can be used to generate n random buildings
113
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
114
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
115
    # )
116
    #
117
118
    # # TODO can be used for square around amenity
119
    df_amenities_without_buildings = gpd.read_postgis(
120
        cells_query.statement,
121
        cells_query.session.bind,
122
        geom_col="geom_amenity",
123
    )
124
    return df_amenities_without_buildings
125
126
127
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
128
    """
129
    Building centroids are placed randomly within census cells.
130
    The Number of buildings is derived from n_amenity_inside, the selected
131
    method and number of amenities per building.
132
133
    Returns
134
    -------
135
    df: gpd.GeoDataFrame
136
        Table of buildings centroids
137
    """
138
    if isinstance(max_amenities, int):
139
        # amount of amenities is randomly generated within bounds
140
        # (max_amenities, amenities per cell)
141
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
142
            random_ints_until_sum, args=[max_amenities]
143
        )
144
    if isinstance(amenities, int):
145
        # Specific amount of amenities per building
146
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
147
            specific_int_until_sum, args=[amenities]
148
        )
149
150
    # Unnest each building
151
    df = df.explode(column="n_amenities_inside")
152
153
    # building count per cell
154
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
155
156
    # generate random synthetic buildings
157
    edge_length = 5
158
    # create random points within census cells
159
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
160
161
    df.reset_index(drop=True, inplace=True)
162
    # Store center of polygon
163
    df["geom_point"] = points
164
    # Drop geometry of census cell
165
    df = df.drop(columns=["geom"])
166
167
    return df
168
169
170
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
171
    """
172
    Synthetic buildings are generated around points.
173
174
    Parameters
175
    ----------
176
    df: pd.DataFrame
177
        Table of census cells
178
    points: gpd.GeoSeries or str
179
        List of points to place buildings around or column name of df
180
    crs: str
181
        CRS of result table
182
183
    Returns
184
    -------
185
    df: gpd.GeoDataFrame
186
        Synthetic buildings
187
    """
188
189
    if isinstance(points, str) and points in df.columns:
190
        points = df[points]
191
    elif isinstance(points, gpd.GeoSeries):
192
        pass
193
    else:
194
        raise ValueError("Points are of the wrong type")
195
196
    # Create building using a square around point
197
    edge_length = 5
198
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
199
200
    if "geom_point" not in df.columns:
201
        df["geom_point"] = df["geom_building"].centroid
202
203
    # TODO Check CRS
204
    df = gpd.GeoDataFrame(
205
        df,
206
        crs=crs,
207
        geometry="geom_building",
208
    )
209
210
    # TODO remove after implementation of egon_building_id
211
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
212
213
    # get max number of building ids from synthetic residential table
214
    with db.session_scope() as session:
215
        max_synth_residential_id = session.execute(
216
            func.max(OsmBuildingsSynthetic.id)
217
        ).scalar()
218
    max_synth_residential_id = int(max_synth_residential_id)
219
220
    # create sequential ids
221
    df["egon_building_id"] = range(
222
        max_synth_residential_id + 1,
223
        max_synth_residential_id + df.shape[0] + 1,
224
    )
225
226
    df["area"] = df["geom_building"].area
227
    # set building type of synthetic building
228
    df["building"] = "cts"
229
    # TODO remove in #772
230
    df = df.rename(
231
        columns={
232
            # "zensus_population_id": "cell_id",
233
            "egon_building_id": "id",
234
        }
235
    )
236
    return df
237
238
239
def buildings_with_amenities():
240
    """
241
    Amenities which are assigned to buildings are determined
242
    and grouped per building and zensus cell. Buildings
243
    covering multiple cells therefore exists multiple times
244
    but in different zensus cells. This is necessary to cover
245
    all cells with a cts demand. If buildings exist in multiple
246
    substations, their amenities are summed and assigned and kept in
247
    one substation only. If as a result, a census cell is uncovered,
248
    a synthetic amenity is placed. The buildings are aggregated
249
    afterwards during the calculation of the profile_share.
250
251
    Returns
252
    -------
253
    df_buildings_with_amenities: gpd.GeoDataFrame
254
        Contains all buildings with amenities per zensus cell.
255
    df_lost_cells: gpd.GeoDataFrame
256
        Contains synthetic amenities in lost cells. Might be empty
257
    """
258
259
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
260
261
    with db.session_scope() as session:
262
        cells_query = (
263
            session.query(
264
                osm_amenities_in_buildings_filtered,
265
                MapZensusGridDistricts.bus_id,
266
            )
267
            .filter(
268
                MapZensusGridDistricts.zensus_population_id
269
                == osm_amenities_in_buildings_filtered.zensus_population_id
270
            )
271
            .filter(
272
                EgonDemandRegioZensusElectricity.zensus_population_id
273
                == osm_amenities_in_buildings_filtered.zensus_population_id
274
            )
275
            .filter(
276
                EgonDemandRegioZensusElectricity.sector == "service",
277
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
278
            )
279
        )
280
    df_amenities_in_buildings = pd.read_sql(
281
        cells_query.statement, cells_query.session.bind, index_col=None
282
    )
283
284
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
285
        "geom_building"
286
    ].apply(to_shape)
287
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
288
        "geom_amenity"
289
    ].apply(to_shape)
290
291
    df_amenities_in_buildings["n_amenities_inside"] = 1
292
293
    # add identifier column for buildings in multiple substations
294
    df_amenities_in_buildings[
295
        "duplicate_identifier"
296
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
297
        "n_amenities_inside"
298
    ].transform(
299
        "cumsum"
300
    )
301
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
302
        ["id", "duplicate_identifier"]
303
    )
304
    # sum amenities of buildings with multiple substations
305
    df_amenities_in_buildings[
306
        "n_amenities_inside"
307
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
308
        "n_amenities_inside"
309
    ].transform(
310
        "sum"
311
    )
312
    # identify lost zensus cells
313
    df_lost_cells = df_amenities_in_buildings.loc[
314
        df_amenities_in_buildings.duplicated(
315
            subset=["id", "duplicate_identifier"], keep="first"
316
        )
317
    ]["zensus_population_id"]
318
319
    # drop duplicated buildings
320
    df_amenities_in_buildings.drop_duplicates(
321
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
322
    )
323
324
    # check if lost zensus cells are already covered
325
    if not df_lost_cells.empty:
326
        if not (
327
            df_amenities_in_buildings["zensus_population_id"]
328
            .isin(df_lost_cells)
329
            .empty
330
        ):
331
            # query geom data for cell if not
332
            with db.session_scope() as session:
333
                cells_query = session.query(
334
                    DestatisZensusPopulationPerHa.id,
335
                    DestatisZensusPopulationPerHa.geom,
336
                ).filter(DestatisZensusPopulationPerHa.id.in_(df_lost_cells))
337
338
            df_lost_cells = gpd.read_postgis(
339
                cells_query.statement,
340
                cells_query.session.bind,
341
                geom_col="geom",
342
            )
343
            # TODO maybe adapt method
344
            # place random amenity in cell
345
            df_lost_cells["n_amenities_inside"] = 1
346
            df_lost_cells = place_buildings_with_amenities(
347
                df_lost_cells, amenities=1
348
            )
349
            df_lost_cells.rename(
350
                columns={
351
                    "id": "zensus_population_id",
352
                    "geom_point": "geom_amenity",
353
                },
354
                inplace=True,
355
            )
356
            df_lost_cells.drop(
357
                columns=["building_count", "n_amenities_inside"], inplace=True
358
            )
359
        else:
360
            df_lost_cells = None
361
    else:
362
        df_lost_cells = None
363
364
    # drop helper columns
365
    df_amenities_in_buildings.drop(
366
        columns=["duplicate_identifier"], inplace=True
367
    )
368
369
    # sum amenities per building and cell
370
    df_amenities_in_buildings[
371
        "n_amenities_inside"
372
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
373
        "n_amenities_inside"
374
    ].transform(
375
        "sum"
376
    )
377
    # drop duplicated buildings
378
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
379
        ["id", "zensus_population_id"]
380
    )
381
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
382
383
    df_buildings_with_amenities = df_buildings_with_amenities[
384
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
385
    ]
386
    df_buildings_with_amenities.rename(
387
        columns={
388
            # "zensus_population_id": "cell_id",
389
            "egon_building_id": "id"
390
        },
391
        inplace=True,
392
    )
393
394
    return df_buildings_with_amenities, df_lost_cells
395
396
397
# TODO Remove as depricated
398
def write_synthetic_buildings_to_db(df_synthetic_buildings):
399
    """"""
400
    if "geom_point" not in df_synthetic_buildings.columns:
401
        df_synthetic_buildings["geom_point"] = df_synthetic_buildings[
402
            "geom_building"
403
        ].centroid
404
405
    df_synthetic_buildings = df_synthetic_buildings.rename(
406
        columns={
407
            "zensus_population_id": "cell_id",
408
            "egon_building_id": "id",
409
        }
410
    )
411
    # Only take existing columns
412
    columns = [
413
        column.key for column in OsmBuildingsSynthetic.__table__.columns
414
    ]
415
    df_synthetic_buildings = df_synthetic_buildings.loc[:, columns]
416
417
    dtypes = {
418
        i: OsmBuildingsSynthetic.__table__.columns[i].type
419
        for i in OsmBuildingsSynthetic.__table__.columns.keys()
420
    }
421
422
    # Write new buildings incl coord into db
423
    df_synthetic_buildings.to_postgis(
424
        name=OsmBuildingsSynthetic.__tablename__,
425
        con=engine,
426
        if_exists="append",
427
        schema=OsmBuildingsSynthetic.__table_args__["schema"],
428
        dtype=dtypes,
429
    )
430
431
432
def buildings_without_amenities():
433
    """
434
    Buildings (filtered and synthetic) in cells with
435
    cts demand but no amenities are determined.
436
437
    Returns
438
    -------
439
    df_buildings_without_amenities: gpd.GeoDataFrame
440
        Table of buildings without amenities in zensus cells
441
        with cts demand.
442
    """
443
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
444
    from saio.openstreetmap import (
445
        osm_amenities_shops_filtered,
446
        osm_buildings_filtered,
447
        osm_buildings_synthetic,
448
    )
449
450
    # buildings_filtered in cts-demand-cells without amenities
451
    with db.session_scope() as session:
452
453
        # Synthetic Buildings
454
        q_synth_buildings = session.query(
455
            osm_buildings_synthetic.cell_id.cast(Integer).label(
456
                "zensus_population_id"
457
            ),
458
            osm_buildings_synthetic.id.cast(Integer).label("id"),
459
            osm_buildings_synthetic.area.label("area"),
460
            osm_buildings_synthetic.geom_building.label("geom_building"),
461
            osm_buildings_synthetic.geom_point.label("geom_point"),
462
        )
463
464
        # Buildings filtered
465
        q_buildings_filtered = session.query(
466
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
467
            osm_buildings_filtered.id,
468
            osm_buildings_filtered.area,
469
            osm_buildings_filtered.geom_building,
470
            osm_buildings_filtered.geom_point,
471
        ).filter(
472
            osm_buildings_filtered.id
473
            == egon_map_zensus_buildings_filtered_all.id
474
        )
475
476
        # Amenities + zensus_population_id
477
        q_amenities = (
478
            session.query(
479
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
480
            )
481
            .filter(
482
                func.st_within(
483
                    osm_amenities_shops_filtered.geom_amenity,
484
                    DestatisZensusPopulationPerHa.geom,
485
                )
486
            )
487
            .distinct(DestatisZensusPopulationPerHa.id)
488
        )
489
490
        # Cells with CTS demand but without amenities
491
        q_cts_without_amenities = (
492
            session.query(
493
                EgonDemandRegioZensusElectricity.zensus_population_id,
494
            )
495
            .filter(
496
                EgonDemandRegioZensusElectricity.sector == "service",
497
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
498
            )
499
            .filter(
500
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
501
                    q_amenities
502
                )
503
            )
504
            .distinct()
505
        )
506
507
        # Buildings filtered + synthetic buildings residential in
508
        # cells with CTS demand but without amenities
509
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
510
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
511
                q_cts_without_amenities
512
            )
513
        )
514
515
    # df_buildings_without_amenities = pd.read_sql(
516
    #     cells_query.statement, cells_query.session.bind, index_col=None)
517
    df_buildings_without_amenities = gpd.read_postgis(
518
        cells_query.statement,
519
        cells_query.session.bind,
520
        geom_col="geom_building",
521
    )
522
523
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
524
        columns={
525
            # "zensus_population_id": "cell_id",
526
            "egon_building_id": "id",
527
        }
528
    )
529
530
    return df_buildings_without_amenities
531
532
533
def select_cts_buildings(df_buildings_wo_amenities):
534
    """
535
    Buildings (filtered and synthetic) in cells with
536
    cts demand are selected. Only the first building
537
    is taken for each cell and 1 amenities is assigned.
538
539
    Returns
540
    -------
541
    df_buildings_with_cts_demand: gpd.GeoDataFrame
542
        Table of buildings
543
    """
544
    # TODO Adapt method
545
    # Select one building each cell
546
    # take the first
547
    df_buildings_with_cts_demand = df_buildings_wo_amenities.drop_duplicates(
548
        # subset="cell_id", keep="first"
549
        subset="zensus_population_id",
550
        keep="first",
551
    ).reset_index(drop=True)
552
    df_buildings_with_cts_demand["n_amenities_inside"] = 1
553
    df_buildings_with_cts_demand["building"] = "cts"
554
555
    return df_buildings_with_cts_demand
556
557
558
def cells_with_cts_demand_only(df_buildings_without_amenities):
559
    """
560
    Cells with cts demand but no amenities or buildilngs
561
    are determined.
562
563
    Returns
564
    -------
565
    df_cells_only_cts_demand: gpd.GeoDataFrame
566
        Table of cells with cts demand but no amenities or buildings
567
    """
568
    from saio.openstreetmap import osm_amenities_shops_filtered
569
570
    # cells mit amenities
571
    with db.session_scope() as session:
572
        sub_query = (
573
            session.query(
574
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
575
            )
576
            .filter(
577
                func.st_within(
578
                    osm_amenities_shops_filtered.geom_amenity,
579
                    DestatisZensusPopulationPerHa.geom,
580
                )
581
            )
582
            .distinct(DestatisZensusPopulationPerHa.id)
583
        )
584
585
        cells_query = (
586
            session.query(
587
                EgonDemandRegioZensusElectricity.zensus_population_id,
588
                EgonDemandRegioZensusElectricity.scenario,
589
                EgonDemandRegioZensusElectricity.sector,
590
                EgonDemandRegioZensusElectricity.demand,
591
                DestatisZensusPopulationPerHa.geom,
592
            )
593
            .filter(
594
                EgonDemandRegioZensusElectricity.sector == "service",
595
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
596
            )
597
            .filter(
598
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
599
                    sub_query
600
                )
601
            )
602
            .filter(
603
                EgonDemandRegioZensusElectricity.zensus_population_id
604
                == DestatisZensusPopulationPerHa.id
605
            )
606
        )
607
608
    df_cts_cell_without_amenities = gpd.read_postgis(
609
        cells_query.statement,
610
        cells_query.session.bind,
611
        geom_col="geom",
612
        index_col=None,
613
    )
614
615
    # TODO maybe remove
616
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
617
        columns={"cell_id": "zensus_population_id"}
618
    )
619
620
    # Census cells with only cts demand
621
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
622
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
623
            df_buildings_without_amenities["zensus_population_id"].unique()
624
        )
625
    ]
626
627
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
628
629
    return df_cells_only_cts_demand
630
631
632
def calc_census_cell_share(scenario="eGon2035"):
633
    """
634
    The profile share for each census cell is calculated by it's
635
    share of annual demand per substation bus. The annual demand
636
    per cell is defined by DemandRegio. The share is for both
637
    scenarios identical as the annual demand is linearly scaled.
638
639
    Parameters
640
    ----------
641
    scenario: str
642
        Scenario for which the share is calculated.
643
644
    Returns
645
    -------
646
    df_census_share: pd.DataFrame
647
    """
648
649
    with db.session_scope() as session:
650
        cells_query = (
651
            session.query(
652
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
653
            )
654
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
655
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
656
            .filter(
657
                EgonDemandRegioZensusElectricity.zensus_population_id
658
                == MapZensusGridDistricts.zensus_population_id
659
            )
660
        )
661
662
    df_demand_regio_electricity_demand = pd.read_sql(
663
        cells_query.statement,
664
        cells_query.session.bind,
665
        index_col="zensus_population_id",
666
    )
667
668
    # get demand share of cell per bus
669
    df_census_share = df_demand_regio_electricity_demand[
670
        "demand"
671
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
672
        "demand"
673
    ].transform(
674
        "sum"
675
    )
676
    df_census_share = df_census_share.rename("cell_share")
677
678
    df_census_share = pd.concat(
679
        [
680
            df_census_share,
681
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
682
        ],
683
        axis=1,
684
    )
685
686
    df_census_share.reset_index(inplace=True)
687
    return df_census_share
688
689
690
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
691
    """
692
    Share of cts electricity demand profile per bus for every selected building
693
    is calculated. Building-amenity share is multiplied with census cell share
694
    to get the substation bus profile share for each building. The share is
695
    grouped and aggregated per building as some cover multiple cells.
696
697
    Parameters
698
    ----------
699
    df_cts_buildings: gpd.GeoDataFrame
700
        Table of all buildings with cts demand assigned
701
    scenario: str
702
        Scenario for which the share is calculated.
703
704
    Returns
705
    -------
706
    df_building_share: pd.DataFrame
707
        Table of bus profile share per building
708
709
    """
710
711
    def calc_building_amenity_share(df_cts_buildings):
712
        """
713
        Calculate the building share by the number amenities per building
714
        within a census cell.
715
        """
716
        df_building_amenity_share = df_cts_buildings[
717
            "n_amenities_inside"
718
        ] / df_cts_buildings.groupby("zensus_population_id")[
719
            "n_amenities_inside"
720
        ].transform(
721
            "sum"
722
        )
723
        df_building_amenity_share = pd.concat(
724
            [
725
                df_building_amenity_share.rename("building_amenity_share"),
726
                df_cts_buildings[["zensus_population_id", "id"]],
727
            ],
728
            axis=1,
729
        )
730
        return df_building_amenity_share
731
732
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
733
734
    df_census_cell_share = calc_census_cell_share(scenario)
735
736
    df_demand_share = pd.merge(
737
        left=df_building_amenity_share,
738
        right=df_census_cell_share,
739
        left_on="zensus_population_id",
740
        right_on="zensus_population_id",
741
    )
742
    df_demand_share["profile_share"] = df_demand_share[
743
        "building_amenity_share"
744
    ].multiply(df_demand_share["cell_share"])
745
746
    df_demand_share = df_demand_share[
747
        ["id", "bus_id", "scenario", "profile_share"]
748
    ]
749
    # Group and aggregate per building for multi cell buildings
750
    df_demand_share = (
751
        df_demand_share.groupby(["scenario", "id", "bus_id"])
752
        .sum()
753
        .reset_index()
754
    )
755
756
    return df_demand_share
757
758
759
def calc_building_profiles(
760
    df_demand_share=None,
761
    egon_building_id=None,
762
    bus_id=None,
763
    scenario="eGon2035",
764
):
765
    """
766
    Calculate the demand profile for each building. The profile is
767
    calculated by the demand share of the building per substation bus.
768
769
    Parameters
770
    ----------
771
    df_demand_share: pd.DataFrame
772
        Table of demand share per building. If not given, table is
773
        sourced from database.
774
    egon_building_id: int
775
        Id of the building for which the profile is calculated. If not
776
        given, the profiles are calculated for all buildings.
777
778
    Returns
779
    -------
780
    df_building_profiles: pd.DataFrame
781
        Table of demand profile per building
782
    """
783
784
    if not isinstance(df_demand_share, pd.DataFrame):
785
        with db.session_scope() as session:
786
            cells_query = session.query(
787
                EgonCtsElectricityDemandBuildingShare,
788
            ).filter(
789
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
790
            )
791
792
        df_demand_share = pd.read_sql(
793
            cells_query.statement, cells_query.session.bind, index_col=None
794
        )
795
    else:
796
        df_demand_share = df_demand_share.loc[
797
            df_demand_share["scenario"] == scenario
798
        ]
799
800
    df_cts_profiles = calc_load_curves_cts(scenario)
801
802
    # get demand share of selected building id
803
    if isinstance(egon_building_id, int):
804
        if egon_building_id in df_demand_share["id"]:
805
            df_demand_share = df_demand_share.loc[
806
                df_demand_share["id"] == egon_building_id
807
            ]
808
        else:
809
            raise KeyError(f"Building with id {egon_building_id} not found")
810
811
    # get demand share of all buildings for selected bus id
812
    if isinstance(bus_id, int):
813
        if bus_id in df_demand_share["bus_id"]:
814
            df_demand_share = df_demand_share.loc[
815
                df_demand_share["bus_id"] == bus_id
816
            ]
817
        else:
818
            raise KeyError(f"Bus with id {bus_id} not found")
819
820
    # get demand profile for all buildings for selected demand share
821
    df_building_profiles = pd.DataFrame()
822
    for bus_id, df in df_demand_share.groupby("bus_id"):
823
        shares = df.set_index("id", drop=True)["profile_share"]
824
        profile = df_cts_profiles.loc[:, bus_id]
825
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 822 is not entered. Are you sure this can never be the case?
Loading history...
826
        df_building_profiles = pd.concat(
827
            [df_building_profiles, building_profiles], axis=1
828
        )
829
830
    return df_building_profiles
831
832
833
def cts_to_buildings():
834
    """
835
    Assigns CTS demand to buildings and calculates the respective demand
836
    profiles. The demand profile per substation are disaggregated per
837
    annual demand share of each census cell and by the number of amenities
838
    per building within the cell. If no building data is available,
839
    synthetic buildings are generated around the amenities. If no amenities
840
    but cts demand is available, buildings are randomly selected. If no
841
    building nor amenity is available, random synthetic buildings are
842
    generated. The demand share is stored in the database.
843
844
    Note:
845
    -----
846
    Cells with CTS demand, amenities and buildings do not change within
847
    the scenarios, only the demand itself. Therefore scenario eGon2035
848
    can be used universally to determine the cts buildings but not for
849
    he demand share.
850
    """
851
852
    # Buildings with amenities
853
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
854
855
    # Remove synthetic CTS buildings if existing
856
    delete_synthetic_cts_buildings()
857
858
    # Create synthetic buildings for amenites without buildings
859
    df_amenities_without_buildings = amenities_without_buildings()
860
    if df_lost_cells.empty:
861
        df_amenities_without_buildings = df_amenities_without_buildings.append(
862
            df_lost_cells, ignore_index=True
863
        )
864
865
    df_amenities_without_buildings["n_amenities_inside"] = 1
866
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
867
        df_amenities_without_buildings, points="geom_amenity"
868
    )
869
870
    # TODO write to DB and remove renaming
871
    # write_synthetic_buildings_to_db(df_synthetic_buildings_with_amenities)
872
    write_table_to_postgis(
873
        df_synthetic_buildings_with_amenities.rename(
874
            columns={
875
                "zensus_population_id": "cell_id",
876
                "egon_building_id": "id",
877
            }
878
        ),
879
        OsmBuildingsSynthetic,
880
        drop=False,
881
    )
882
883
    # Cells without amenities but CTS demand and buildings
884
    df_buildings_without_amenities = buildings_without_amenities()
885
886
    # TODO Fix Adhoc Bugfix duplicated buildings
887
    mask = df_buildings_without_amenities.loc[
888
        df_buildings_without_amenities["id"].isin(
889
            df_buildings_with_amenities["id"]
890
        )
891
    ].index
892
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
893
        index=mask
894
    ).reset_index(drop=True)
895
896
    df_buildings_without_amenities = select_cts_buildings(
897
        df_buildings_without_amenities
898
    )
899
    df_buildings_without_amenities["n_amenities_inside"] = 1
900
901
    # Create synthetic amenities and buildings in cells with only CTS demand
902
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
903
        df_buildings_without_amenities
904
    )
905
    # Only 1 Amenity per cell
906
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
907
    # Only 1 Amenity per Building
908
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
909
        df_cells_with_cts_demand_only, amenities=1
910
    )
911
    # Leads to only 1 building per cell
912
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
913
        df_cells_with_cts_demand_only, points="geom_point"
914
    )
915
916
    # TODO write to DB and remove renaming
917
    # write_synthetic_buildings_to_db(df_synthetic_buildings_without_amenities)
918
    write_table_to_postgis(
919
        df_synthetic_buildings_without_amenities.rename(
920
            columns={
921
                "zensus_population_id": "cell_id",
922
                "egon_building_id": "id",
923
            }
924
        ),
925
        OsmBuildingsSynthetic,
926
        drop=False,
927
    )
928
929
    # Concat all buildings
930
    columns = [
931
        "zensus_population_id",
932
        "id",
933
        "geom_building",
934
        "n_amenities_inside",
935
        "source",
936
    ]
937
938
    df_buildings_with_amenities["source"] = "bwa"
939
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
940
    df_buildings_without_amenities["source"] = "bwoa"
941
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
942
943
    df_cts_buildings = pd.concat(
944
        [
945
            df_buildings_with_amenities[columns],
946
            df_synthetic_buildings_with_amenities[columns],
947
            df_buildings_without_amenities[columns],
948
            df_synthetic_buildings_without_amenities[columns],
949
        ],
950
        axis=0,
951
        ignore_index=True,
952
    )
953
    # TODO maybe remove after #772
954
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
955
956
    # Write table to db for debugging
957
    # TODO remove later
958
    df_cts_buildings = gpd.GeoDataFrame(
959
        df_cts_buildings, geometry="geom_building", crs=3035
960
    )
961
    df_cts_buildings = df_cts_buildings.reset_index().rename(
962
        columns={"index": "serial"}
963
    )
964
    write_table_to_postgis(
965
        df_cts_buildings,
966
        CtsBuildings,
967
        drop=True,
968
    )
969
970
    df_demand_share_2035 = calc_building_demand_profile_share(
971
        df_cts_buildings, scenario="eGon2035"
972
    )
973
    df_demand_share_100RE = calc_building_demand_profile_share(
974
        df_cts_buildings, scenario="eGon100RE"
975
    )
976
977
    df_demand_share = pd.concat(
978
        [df_demand_share_2035, df_demand_share_100RE],
979
        axis=0,
980
        ignore_index=True,
981
    )
982
983
    write_table_to_postgres(
984
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
985
    )
986
987
    return df_cts_buildings, df_demand_share
988
989
990
def get_peak_load_cts_buildings():
991
    """
992
    Get peak load of all CTS buildings for both scenarios and store in DB.
993
    """
994
995
    # TODO Check units, maybe MwH?
996
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
997
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename("eGon2035")
998
    df_building_profiles = calc_building_profiles(scenario="eGon100RE")
999
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename("eGon100RE")
1000
    df_peak_load = pd.concat(
1001
        [df_peak_load_2035, df_peak_load_100RE], axis=1
1002
    ).reset_index()
1003
1004
    df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1005
    df_peak_load["type"] = "cts"
1006
    df_building_peak_loads = df_peak_load.melt(
1007
        id_vars=["building_id", "type"],
1008
        var_name="scenario",
1009
        value_name="peak_demand_in_w",
1010
    )
1011
1012
    # Delete rows with cts demand
1013
    with db.session_scope() as session:
1014
        session.query(BuildingPeakLoads).filter(
1015
            BuildingPeakLoads.type == "cts"
1016
        ).delete()
1017
1018
    # Write peak loads into db
1019
    with db.session_scope() as session:
1020
        session.bulk_insert_mappings(
1021
            BuildingPeakLoads,
1022
            df_peak_load.to_dict(orient="records"),
1023
        )
1024
1025
1026
def delete_synthetic_cts_buildings():
1027
    """
1028
    All synthetic cts buildings are deleted from the DB. This is necessary if
1029
    the task is run multiple times as the existing synthetic buildings
1030
    influence the results.
1031
    """
1032
    # import db tables
1033
    from saio.openstreetmap import osm_buildings_synthetic
1034
1035
    # cells mit amenities
1036
    with db.session_scope() as session:
1037
        session.query(osm_buildings_synthetic).filter(
1038
            osm_buildings_synthetic.building == "cts"
1039
        ).delete()
1040
1041
1042
class CtsElectricityBuildings(Dataset):
1043
    def __init__(self, dependencies):
1044
        super().__init__(
1045
            name="CtsElectricityBuildings",
1046
            version="0.0.0.",
1047
            dependencies=dependencies,
1048
            tasks=(
1049
                cts_to_buildings,
1050
                get_peak_load_cts_buildings,
1051
                # get_all_cts_building_profiles,
1052
            ),
1053
        )
1054