Passed
Pull Request — dev (#826)
by
unknown
02:03
created

data.datasets.electricity_demand_timeseries.cts_buildings   A

Complexity

Total Complexity 42

Size/Duplication

Total Lines 1046
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 42
eloc 531
dl 0
loc 1046
rs 9.0399
c 0
b 0
f 0

14 Functions

Rating   Name   Duplication   Size   Complexity  
A delete_synthetic_cts_buildings() 0 14 2
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A get_peak_load_cts_buildings() 0 31 3
C calc_building_profiles() 0 72 9
A amenities_without_buildings() 0 60 2
A select_cts_buildings() 0 23 1
B cells_with_cts_demand_only() 0 72 2
A write_synthetic_buildings_to_db() 0 31 2
B cts_to_buildings() 0 155 2
A calc_building_demand_profile_share() 0 67 1
B buildings_with_amenities() 0 153 4
A calc_census_cell_share() 0 56 2
B buildings_without_amenities() 0 99 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 8 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    BuildingPeakLoads,
17
    OsmBuildingsSynthetic,
18
)
19
from egon.data.datasets.electricity_demand_timeseries.tools import (
20
    random_ints_until_sum,
21
    random_point_in_square,
22
    specific_int_until_sum,
23
    write_table_to_postgis,
24
    write_table_to_postgres,
25
)
26
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
27
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
28
import egon.data.config
29
30
engine = db.engine()
31
Base = declarative_base()
32
33
data_config = egon.data.config.datasets()
34
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"]
35
36
# import db tables
37
saio.register_schema("openstreetmap", engine=engine)
38
saio.register_schema("society", engine=engine)
39
saio.register_schema("demand", engine=engine)
40
saio.register_schema("boundaries", engine=engine)
41
42
43
class EgonCtsElectricityDemandBuildingShare(Base):
44
    __tablename__ = "egon_cts_electricity_demand_building_share"
45
    __table_args__ = {"schema": "demand"}
46
47
    id = Column(Integer, primary_key=True)
48
    scenario = Column(String, primary_key=True)
49
    bus_id = Column(Integer, index=True)
50
    profile_share = Column(Float)
51
52
53
class CtsBuildings(Base):
54
    __tablename__ = "egon_cts_buildings"
55
    __table_args__ = {"schema": "openstreetmap"}
56
57
    serial = Column(Integer, primary_key=True)
58
    id = Column(Integer)
59
    zensus_population_id = Column(Integer, index=True)
60
    geom_building = Column(Geometry("Polygon", 3035))
61
    n_amenities_inside = Column(Integer)
62
    source = Column(String)
63
64
65
def amenities_without_buildings():
66
    """
67
    Amenities which have no buildings assigned and are in
68
    a cell with cts demand are determined.
69
70
    Returns
71
    -------
72
    pd.DataFrame
73
        Table of amenities without buildings
74
    """
75
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
76
77
    with db.session_scope() as session:
78
        cells_query = (
79
            session.query(
80
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
81
                # TODO can be used for square around amenity
82
                #  (1 geom_amenity: 1 geom_building)
83
                #  not unique amenity_ids yet
84
                osm_amenities_not_in_buildings_filtered.geom_amenity,
85
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
86
                # EgonDemandRegioZensusElectricity.demand,
87
                # # TODO can be used to generate n random buildings
88
                # # (n amenities : 1 randombuilding)
89
                # func.count(
90
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
91
                # ).label("n_amenities_inside"),
92
                # DestatisZensusPopulationPerHa.geom,
93
            )
94
            .filter(
95
                func.st_within(
96
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
97
                    DestatisZensusPopulationPerHa.geom,
98
                )
99
            )
100
            .filter(
101
                DestatisZensusPopulationPerHa.id
102
                == EgonDemandRegioZensusElectricity.zensus_population_id
103
            )
104
            .filter(
105
                EgonDemandRegioZensusElectricity.sector == "service",
106
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
107
                #         ).group_by(
108
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
109
                #             DestatisZensusPopulationPerHa.geom,
110
            )
111
        )
112
    # # TODO can be used to generate n random buildings
113
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
114
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
115
    # )
116
    #
117
118
    # # TODO can be used for square around amenity
119
    df_amenities_without_buildings = gpd.read_postgis(
120
        cells_query.statement,
121
        cells_query.session.bind,
122
        geom_col="geom_amenity",
123
    )
124
    return df_amenities_without_buildings
125
126
127
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
128
    """
129
    Building centroids are placed randomly within census cells.
130
    The Number of buildings is derived from n_amenity_inside, the selected
131
    method and number of amenities per building.
132
133
    Returns
134
    -------
135
    df: gpd.GeoDataFrame
136
        Table of buildings centroids
137
    """
138
    if isinstance(max_amenities, int):
139
        # amount of amenities is randomly generated within bounds
140
        # (max_amenities, amenities per cell)
141
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
142
            random_ints_until_sum, args=[max_amenities]
143
        )
144
    if isinstance(amenities, int):
145
        # Specific amount of amenities per building
146
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
147
            specific_int_until_sum, args=[amenities]
148
        )
149
150
    # Unnest each building
151
    df = df.explode(column="n_amenities_inside")
152
153
    # building count per cell
154
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
155
156
    # generate random synthetic buildings
157
    edge_length = 5
158
    # create random points within census cells
159
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
160
161
    df.reset_index(drop=True, inplace=True)
162
    # Store center of polygon
163
    df["geom_point"] = points
164
    # Drop geometry of census cell
165
    df = df.drop(columns=["geom"])
166
167
    return df
168
169
170
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
171
    """
172
    Synthetic buildings are generated around points.
173
174
    Parameters
175
    ----------
176
    df: pd.DataFrame
177
        Table of census cells
178
    points: gpd.GeoSeries or str
179
        List of points to place buildings around or column name of df
180
    crs: str
181
        CRS of result table
182
183
    Returns
184
    -------
185
    df: gpd.GeoDataFrame
186
        Synthetic buildings
187
    """
188
189
    if isinstance(points, str) and points in df.columns:
190
        points = df[points]
191
    elif isinstance(points, gpd.GeoSeries):
192
        pass
193
    else:
194
        raise ValueError("Points are of the wrong type")
195
196
    # Create building using a square around point
197
    edge_length = 5
198
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
199
200
    if "geom_point" not in df.columns:
201
        df["geom_point"] = df["geom_building"].centroid
202
203
    # TODO Check CRS
204
    df = gpd.GeoDataFrame(
205
        df,
206
        crs=crs,
207
        geometry="geom_building",
208
    )
209
210
    # TODO remove after implementation of egon_building_id
211
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
212
213
    # get max number of building ids from synthetic residential table
214
    with db.session_scope() as session:
215
        max_synth_residential_id = session.execute(
216
            func.max(OsmBuildingsSynthetic.id)
217
        ).scalar()
218
    max_synth_residential_id = int(max_synth_residential_id)
219
220
    # create sequential ids
221
    df["egon_building_id"] = range(
222
        max_synth_residential_id + 1,
223
        max_synth_residential_id + df.shape[0] + 1,
224
    )
225
226
    df["area"] = df["geom_building"].area
227
    # set building type of synthetic building
228
    df["building"] = "cts"
229
    # TODO remove in #772
230
    df = df.rename(
231
        columns={
232
            # "zensus_population_id": "cell_id",
233
            "egon_building_id": "id",
234
        }
235
    )
236
    return df
237
238
239
def buildings_with_amenities():
240
    """
241
    Amenities which are assigned to buildings are determined
242
    and grouped per building and zensus cell. Buildings
243
    covering multiple cells therefore exists multiple times
244
    but in different zensus cells. This is necessary to cover
245
    all cells with a cts demand. If buildings exist in multiple
246
    substations, their amenities are summed and assigned and kept in
247
    one substation only. If as a result, a census cell is uncovered,
248
    a synthetic amenity is placed. The buildings are aggregated
249
    afterwards during the calculation of the profile_share.
250
251
    Returns
252
    -------
253
    df_buildings_with_amenities: gpd.GeoDataFrame
254
        Contains all buildings with amenities per zensus cell.
255
    df_lost_cells: gpd.GeoDataFrame
256
        Contains synthetic amenities in lost cells. Might be empty
257
    """
258
259
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
260
261
    with db.session_scope() as session:
262
        cells_query = (
263
            session.query(
264
                osm_amenities_in_buildings_filtered,
265
                MapZensusGridDistricts.bus_id,
266
            )
267
            .filter(
268
                MapZensusGridDistricts.zensus_population_id
269
                == osm_amenities_in_buildings_filtered.zensus_population_id
270
            )
271
            .filter(
272
                EgonDemandRegioZensusElectricity.zensus_population_id
273
                == osm_amenities_in_buildings_filtered.zensus_population_id
274
            )
275
            .filter(
276
                EgonDemandRegioZensusElectricity.sector == "service",
277
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
278
            )
279
        )
280
    df_amenities_in_buildings = pd.read_sql(
281
        cells_query.statement, cells_query.session.bind, index_col=None
282
    )
283
284
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
285
        "geom_building"
286
    ].apply(to_shape)
287
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
288
        "geom_amenity"
289
    ].apply(to_shape)
290
291
    df_amenities_in_buildings["n_amenities_inside"] = 1
292
293
    # add identifier column for buildings in multiple substations
294
    df_amenities_in_buildings[
295
        "duplicate_identifier"
296
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
297
        "n_amenities_inside"
298
    ].transform(
299
        "cumsum"
300
    )
301
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
302
        ["id", "duplicate_identifier"]
303
    )
304
    # sum amenities of buildings with multiple substations
305
    df_amenities_in_buildings[
306
        "n_amenities_inside"
307
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
308
        "n_amenities_inside"
309
    ].transform(
310
        "sum"
311
    )
312
    # identify lost zensus cells
313
    df_lost_cells = df_amenities_in_buildings.loc[
314
        df_amenities_in_buildings.duplicated(
315
            subset=["id", "duplicate_identifier"], keep="first"
316
        )
317
    ]["zensus_population_id"]
318
319
    # drop duplicated buildings
320
    df_amenities_in_buildings.drop_duplicates(
321
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
322
    )
323
324
    # check if lost zensus cells are already covered
325
    if not (
326
        df_amenities_in_buildings["zensus_population_id"]
327
        .isin(df_lost_cells)
328
        .any()
329
    ):
330
        # query geom data for cell if not
331
        with db.session_scope() as session:
332
            cells_query = session.query(
333
                DestatisZensusPopulationPerHa.id,
334
                DestatisZensusPopulationPerHa.geom,
335
            ).filter(DestatisZensusPopulationPerHa.id.in_(df_lost_cells))
336
337
        df_lost_cells = gpd.read_postgis(
338
            cells_query.statement,
339
            cells_query.session.bind,
340
            geom_col="geom",
341
        )
342
        # TODO maybe adapt method
343
        # place random amenity in cell
344
        df_lost_cells["n_amenities_inside"] = 1
345
        df_lost_cells = place_buildings_with_amenities(
346
            df_lost_cells, amenities=1
347
        )
348
        df_lost_cells.rename(
349
            columns={
350
                "id": "zensus_population_id",
351
                "geom_point": "geom_amenity",
352
            },
353
            inplace=True,
354
        )
355
        df_lost_cells.drop(
356
            columns=["building_count", "n_amenities_inside"], inplace=True
357
        )
358
    else:
359
        df_lost_cells = None
360
361
    # drop helper columns
362
    df_amenities_in_buildings.drop(
363
        columns=["duplicate_identifier"], inplace=True
364
    )
365
366
    # sum amenities per building and cell
367
    df_amenities_in_buildings[
368
        "n_amenities_inside"
369
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
370
        "n_amenities_inside"
371
    ].transform(
372
        "sum"
373
    )
374
    # drop duplicated buildings
375
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
376
        ["id", "zensus_population_id"]
377
    )
378
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
379
380
    df_buildings_with_amenities = df_buildings_with_amenities[
381
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
382
    ]
383
    df_buildings_with_amenities.rename(
384
        columns={
385
            # "zensus_population_id": "cell_id",
386
            "egon_building_id": "id"
387
        },
388
        inplace=True,
389
    )
390
391
    return df_buildings_with_amenities, df_lost_cells
392
393
394
# TODO Remove as depricated
395
def write_synthetic_buildings_to_db(df_synthetic_buildings):
396
    """"""
397
    if "geom_point" not in df_synthetic_buildings.columns:
398
        df_synthetic_buildings["geom_point"] = df_synthetic_buildings[
399
            "geom_building"
400
        ].centroid
401
402
    df_synthetic_buildings = df_synthetic_buildings.rename(
403
        columns={
404
            "zensus_population_id": "cell_id",
405
            "egon_building_id": "id",
406
        }
407
    )
408
    # Only take existing columns
409
    columns = [
410
        column.key for column in OsmBuildingsSynthetic.__table__.columns
411
    ]
412
    df_synthetic_buildings = df_synthetic_buildings.loc[:, columns]
413
414
    dtypes = {
415
        i: OsmBuildingsSynthetic.__table__.columns[i].type
416
        for i in OsmBuildingsSynthetic.__table__.columns.keys()
417
    }
418
419
    # Write new buildings incl coord into db
420
    df_synthetic_buildings.to_postgis(
421
        name=OsmBuildingsSynthetic.__tablename__,
422
        con=engine,
423
        if_exists="append",
424
        schema=OsmBuildingsSynthetic.__table_args__["schema"],
425
        dtype=dtypes,
426
    )
427
428
429
def buildings_without_amenities():
430
    """
431
    Buildings (filtered and synthetic) in cells with
432
    cts demand but no amenities are determined.
433
434
    Returns
435
    -------
436
    df_buildings_without_amenities: gpd.GeoDataFrame
437
        Table of buildings without amenities in zensus cells
438
        with cts demand.
439
    """
440
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
441
    from saio.openstreetmap import (
442
        osm_amenities_shops_filtered,
443
        osm_buildings_filtered,
444
        osm_buildings_synthetic,
445
    )
446
447
    # buildings_filtered in cts-demand-cells without amenities
448
    with db.session_scope() as session:
449
450
        # Synthetic Buildings
451
        q_synth_buildings = session.query(
452
            osm_buildings_synthetic.cell_id.cast(Integer).label(
453
                "zensus_population_id"
454
            ),
455
            osm_buildings_synthetic.id.cast(Integer).label("id"),
456
            osm_buildings_synthetic.area.label("area"),
457
            osm_buildings_synthetic.geom_building.label("geom_building"),
458
            osm_buildings_synthetic.geom_point.label("geom_point"),
459
        )
460
461
        # Buildings filtered
462
        q_buildings_filtered = session.query(
463
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
464
            osm_buildings_filtered.id,
465
            osm_buildings_filtered.area,
466
            osm_buildings_filtered.geom_building,
467
            osm_buildings_filtered.geom_point,
468
        ).filter(
469
            osm_buildings_filtered.id
470
            == egon_map_zensus_buildings_filtered_all.id
471
        )
472
473
        # Amenities + zensus_population_id
474
        q_amenities = (
475
            session.query(
476
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
477
            )
478
            .filter(
479
                func.st_within(
480
                    osm_amenities_shops_filtered.geom_amenity,
481
                    DestatisZensusPopulationPerHa.geom,
482
                )
483
            )
484
            .distinct(DestatisZensusPopulationPerHa.id)
485
        )
486
487
        # Cells with CTS demand but without amenities
488
        q_cts_without_amenities = (
489
            session.query(
490
                EgonDemandRegioZensusElectricity.zensus_population_id,
491
            )
492
            .filter(
493
                EgonDemandRegioZensusElectricity.sector == "service",
494
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
495
            )
496
            .filter(
497
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
498
                    q_amenities
499
                )
500
            )
501
            .distinct()
502
        )
503
504
        # Buildings filtered + synthetic buildings residential in
505
        # cells with CTS demand but without amenities
506
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
507
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
508
                q_cts_without_amenities
509
            )
510
        )
511
512
    # df_buildings_without_amenities = pd.read_sql(
513
    #     cells_query.statement, cells_query.session.bind, index_col=None)
514
    df_buildings_without_amenities = gpd.read_postgis(
515
        cells_query.statement,
516
        cells_query.session.bind,
517
        geom_col="geom_building",
518
    )
519
520
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
521
        columns={
522
            # "zensus_population_id": "cell_id",
523
            "egon_building_id": "id",
524
        }
525
    )
526
527
    return df_buildings_without_amenities
528
529
530
def select_cts_buildings(df_buildings_wo_amenities):
531
    """
532
    Buildings (filtered and synthetic) in cells with
533
    cts demand are selected. Only the first building
534
    is taken for each cell and 1 amenities is assigned.
535
536
    Returns
537
    -------
538
    df_buildings_with_cts_demand: gpd.GeoDataFrame
539
        Table of buildings
540
    """
541
    # TODO Adapt method
542
    # Select one building each cell
543
    # take the first
544
    df_buildings_with_cts_demand = df_buildings_wo_amenities.drop_duplicates(
545
        # subset="cell_id", keep="first"
546
        subset="zensus_population_id",
547
        keep="first",
548
    ).reset_index(drop=True)
549
    df_buildings_with_cts_demand["n_amenities_inside"] = 1
550
    df_buildings_with_cts_demand["building"] = "cts"
551
552
    return df_buildings_with_cts_demand
553
554
555
def cells_with_cts_demand_only(df_buildings_without_amenities):
556
    """
557
    Cells with cts demand but no amenities or buildilngs
558
    are determined.
559
560
    Returns
561
    -------
562
    df_cells_only_cts_demand: gpd.GeoDataFrame
563
        Table of cells with cts demand but no amenities or buildings
564
    """
565
    from saio.openstreetmap import osm_amenities_shops_filtered
566
567
    # cells mit amenities
568
    with db.session_scope() as session:
569
        sub_query = (
570
            session.query(
571
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
572
            )
573
            .filter(
574
                func.st_within(
575
                    osm_amenities_shops_filtered.geom_amenity,
576
                    DestatisZensusPopulationPerHa.geom,
577
                )
578
            )
579
            .distinct(DestatisZensusPopulationPerHa.id)
580
        )
581
582
        cells_query = (
583
            session.query(
584
                EgonDemandRegioZensusElectricity.zensus_population_id,
585
                EgonDemandRegioZensusElectricity.scenario,
586
                EgonDemandRegioZensusElectricity.sector,
587
                EgonDemandRegioZensusElectricity.demand,
588
                DestatisZensusPopulationPerHa.geom,
589
            )
590
            .filter(
591
                EgonDemandRegioZensusElectricity.sector == "service",
592
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
593
            )
594
            .filter(
595
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
596
                    sub_query
597
                )
598
            )
599
            .filter(
600
                EgonDemandRegioZensusElectricity.zensus_population_id
601
                == DestatisZensusPopulationPerHa.id
602
            )
603
        )
604
605
    df_cts_cell_without_amenities = gpd.read_postgis(
606
        cells_query.statement,
607
        cells_query.session.bind,
608
        geom_col="geom",
609
        index_col=None,
610
    )
611
612
    # TODO maybe remove
613
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
614
        columns={"cell_id": "zensus_population_id"}
615
    )
616
617
    # Census cells with only cts demand
618
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
619
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
620
            df_buildings_without_amenities["zensus_population_id"].unique()
621
        )
622
    ]
623
624
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
625
626
    return df_cells_only_cts_demand
627
628
629
def calc_census_cell_share(scenario="eGon2035"):
630
    """
631
    The profile share for each census cell is calculated by it's
632
    share of annual demand per substation bus. The annual demand
633
    per cell is defined by DemandRegio. The share is for both
634
    scenarios identical as the annual demand is linearly scaled.
635
636
    Parameters
637
    ----------
638
    scenario: str
639
        Scenario for which the share is calculated.
640
641
    Returns
642
    -------
643
    df_census_share: pd.DataFrame
644
    """
645
646
    with db.session_scope() as session:
647
        cells_query = (
648
            session.query(
649
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
650
            )
651
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
652
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
653
            .filter(
654
                EgonDemandRegioZensusElectricity.zensus_population_id
655
                == MapZensusGridDistricts.zensus_population_id
656
            )
657
        )
658
659
    df_demand_regio_electricity_demand = pd.read_sql(
660
        cells_query.statement,
661
        cells_query.session.bind,
662
        index_col="zensus_population_id",
663
    )
664
665
    # get demand share of cell per bus
666
    df_census_share = df_demand_regio_electricity_demand[
667
        "demand"
668
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
669
        "demand"
670
    ].transform(
671
        "sum"
672
    )
673
    df_census_share = df_census_share.rename("cell_share")
674
675
    df_census_share = pd.concat(
676
        [
677
            df_census_share,
678
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
679
        ],
680
        axis=1,
681
    )
682
683
    df_census_share.reset_index(inplace=True)
684
    return df_census_share
685
686
687
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
688
    """
689
    Share of cts electricity demand profile per bus for every selected building
690
    is calculated. Building-amenity share is multiplied with census cell share
691
    to get the substation bus profile share for each building. The share is
692
    grouped and aggregated per building as some cover multiple cells.
693
694
    Parameters
695
    ----------
696
    df_cts_buildings: gpd.GeoDataFrame
697
        Table of all buildings with cts demand assigned
698
    scenario: str
699
        Scenario for which the share is calculated.
700
701
    Returns
702
    -------
703
    df_building_share: pd.DataFrame
704
        Table of bus profile share per building
705
706
    """
707
708
    def calc_building_amenity_share(df_cts_buildings):
709
        """
710
        Calculate the building share by the number amenities per building
711
        within a census cell.
712
        """
713
        df_building_amenity_share = df_cts_buildings[
714
            "n_amenities_inside"
715
        ] / df_cts_buildings.groupby("zensus_population_id")[
716
            "n_amenities_inside"
717
        ].transform(
718
            "sum"
719
        )
720
        df_building_amenity_share = pd.concat(
721
            [
722
                df_building_amenity_share.rename("building_amenity_share"),
723
                df_cts_buildings[["zensus_population_id", "id"]],
724
            ],
725
            axis=1,
726
        )
727
        return df_building_amenity_share
728
729
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
730
731
    df_census_cell_share = calc_census_cell_share(scenario)
732
733
    df_demand_share = pd.merge(
734
        left=df_building_amenity_share,
735
        right=df_census_cell_share,
736
        left_on="zensus_population_id",
737
        right_on="zensus_population_id",
738
    )
739
    df_demand_share["profile_share"] = df_demand_share[
740
        "building_amenity_share"
741
    ].multiply(df_demand_share["cell_share"])
742
743
    df_demand_share = df_demand_share[
744
        ["id", "bus_id", "scenario", "profile_share"]
745
    ]
746
    # Group and aggregate per building for multi cell buildings
747
    df_demand_share = (
748
        df_demand_share.groupby(["scenario", "id", "bus_id"])
749
        .sum()
750
        .reset_index()
751
    )
752
753
    return df_demand_share
754
755
756
def calc_building_profiles(
757
    df_demand_share=None,
758
    egon_building_id=None,
759
    bus_id=None,
760
    scenario="eGon2035",
761
):
762
    """
763
    Calculate the demand profile for each building. The profile is
764
    calculated by the demand share of the building per substation bus.
765
766
    Parameters
767
    ----------
768
    df_demand_share: pd.DataFrame
769
        Table of demand share per building. If not given, table is
770
        sourced from database.
771
    egon_building_id: int
772
        Id of the building for which the profile is calculated. If not
773
        given, the profiles are calculated for all buildings.
774
775
    Returns
776
    -------
777
    df_building_profiles: pd.DataFrame
778
        Table of demand profile per building
779
    """
780
781
    if not isinstance(df_demand_share, pd.DataFrame):
782
        with db.session_scope() as session:
783
            cells_query = session.query(
784
                EgonCtsElectricityDemandBuildingShare,
785
            ).filter(
786
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
787
            )
788
789
        df_demand_share = pd.read_sql(
790
            cells_query.statement, cells_query.session.bind, index_col=None
791
        )
792
    else:
793
        df_demand_share = df_demand_share.loc[
794
            df_demand_share["scenario"] == scenario
795
        ]
796
797
    df_cts_profiles = calc_load_curves_cts(scenario)
798
799
    # get demand share of selected building id
800
    if isinstance(egon_building_id, int):
801
        if egon_building_id in df_demand_share["id"]:
802
            df_demand_share = df_demand_share.loc[
803
                df_demand_share["id"] == egon_building_id
804
            ]
805
        else:
806
            raise KeyError(f"Building with id {egon_building_id} not found")
807
808
    # get demand share of all buildings for selected bus id
809
    if isinstance(bus_id, int):
810
        if bus_id in df_demand_share["bus_id"]:
811
            df_demand_share = df_demand_share.loc[
812
                df_demand_share["bus_id"] == bus_id
813
            ]
814
        else:
815
            raise KeyError(f"Bus with id {bus_id} not found")
816
817
    # get demand profile for all buildings for selected demand share
818
    df_building_profiles = pd.DataFrame()
819
    for bus_id, df in df_demand_share.groupby("bus_id"):
820
        shares = df.set_index("id", drop=True)["profile_share"]
821
        profile = df_cts_profiles.loc[:, bus_id]
822
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 819 is not entered. Are you sure this can never be the case?
Loading history...
823
        df_building_profiles = pd.concat(
824
            [df_building_profiles, building_profiles], axis=1
825
        )
826
827
    return df_building_profiles
828
829
830
def cts_to_buildings():
831
    """
832
    Assigns CTS demand to buildings and calculates the respective demand
833
    profiles. The demand profile per substation are disaggregated per
834
    annual demand share of each census cell and by the number of amenities
835
    per building within the cell. If no building data is available,
836
    synthetic buildings are generated around the amenities. If no amenities
837
    but cts demand is available, buildings are randomly selected. If no
838
    building nor amenity is available, random synthetic buildings are
839
    generated. The demand share is stored in the database.
840
841
    Note:
842
    -----
843
    Cells with CTS demand, amenities and buildings do not change within
844
    the scenarios, only the demand itself. Therefore scenario eGon2035
845
    can be used universally to determine the cts buildings but not for
846
    he demand share.
847
    """
848
849
    # Buildings with amenities
850
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
851
852
    # Remove synthetic CTS buildings if existing
853
    delete_synthetic_cts_buildings()
854
855
    # Create synthetic buildings for amenites without buildings
856
    df_amenities_without_buildings = amenities_without_buildings()
857
    if df_lost_cells:
858
        df_amenities_without_buildings = df_amenities_without_buildings.append(
859
            df_lost_cells, ignore_index=True
860
        )
861
862
    df_amenities_without_buildings["n_amenities_inside"] = 1
863
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
864
        df_amenities_without_buildings, points="geom_amenity"
865
    )
866
867
    # TODO write to DB and remove renaming
868
    # write_synthetic_buildings_to_db(df_synthetic_buildings_with_amenities)
869
    write_table_to_postgis(
870
        df_synthetic_buildings_with_amenities.rename(
871
            columns={
872
                "zensus_population_id": "cell_id",
873
                "egon_building_id": "id",
874
            }
875
        ),
876
        OsmBuildingsSynthetic,
877
        drop=False,
878
    )
879
880
    # Cells without amenities but CTS demand and buildings
881
    df_buildings_without_amenities = buildings_without_amenities()
882
883
    # TODO Fix Adhoc Bugfix duplicated buildings
884
    mask = df_buildings_without_amenities.loc[
885
        df_buildings_without_amenities["id"].isin(
886
            df_buildings_with_amenities["id"]
887
        )
888
    ].index
889
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
890
        index=mask
891
    ).reset_index(drop=True)
892
893
    df_buildings_without_amenities = select_cts_buildings(
894
        df_buildings_without_amenities
895
    )
896
    df_buildings_without_amenities["n_amenities_inside"] = 1
897
898
    # Create synthetic amenities and buildings in cells with only CTS demand
899
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
900
        df_buildings_without_amenities
901
    )
902
    # Only 1 Amenity per cell
903
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
904
    # Only 1 Amenity per Building
905
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
906
        df_cells_with_cts_demand_only, amenities=1
907
    )
908
    # Leads to only 1 building per cell
909
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
910
        df_cells_with_cts_demand_only, points="geom_point"
911
    )
912
913
    # TODO write to DB and remove renaming
914
    # write_synthetic_buildings_to_db(df_synthetic_buildings_without_amenities)
915
    write_table_to_postgis(
916
        df_synthetic_buildings_without_amenities.rename(
917
            columns={
918
                "zensus_population_id": "cell_id",
919
                "egon_building_id": "id",
920
            }
921
        ),
922
        OsmBuildingsSynthetic,
923
        drop=False,
924
    )
925
926
    # Concat all buildings
927
    columns = [
928
        "zensus_population_id",
929
        "id",
930
        "geom_building",
931
        "n_amenities_inside",
932
        "source",
933
    ]
934
935
    df_buildings_with_amenities["source"] = "bwa"
936
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
937
    df_buildings_without_amenities["source"] = "bwoa"
938
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
939
940
    df_cts_buildings = pd.concat(
941
        [
942
            df_buildings_with_amenities[columns],
943
            df_synthetic_buildings_with_amenities[columns],
944
            df_buildings_without_amenities[columns],
945
            df_synthetic_buildings_without_amenities[columns],
946
        ],
947
        axis=0,
948
        ignore_index=True,
949
    )
950
    # TODO maybe remove after #772
951
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
952
953
    # Write table to db for debugging
954
    # TODO remove later
955
    df_cts_buildings = gpd.GeoDataFrame(
956
        df_cts_buildings, geometry="geom_building", crs=3035
957
    )
958
    df_cts_buildings = df_cts_buildings.reset_index().rename(
959
        columns={"index": "serial"}
960
    )
961
    write_table_to_postgis(
962
        df_cts_buildings,
963
        CtsBuildings,
964
        drop=True,
965
    )
966
967
    df_demand_share_2035 = calc_building_demand_profile_share(
968
        df_cts_buildings, scenario="eGon2035"
969
    )
970
    df_demand_share_100RE = calc_building_demand_profile_share(
971
        df_cts_buildings, scenario="eGon100RE"
972
    )
973
974
    df_demand_share = pd.concat(
975
        [df_demand_share_2035, df_demand_share_100RE],
976
        axis=0,
977
        ignore_index=True,
978
    )
979
980
    write_table_to_postgres(
981
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
982
    )
983
984
    return df_cts_buildings, df_demand_share
985
986
987
def get_peak_load_cts_buildings():
988
    """
989
    Get peak load of all CTS buildings for both scenarios and store in DB.
990
    """
991
992
    # TODO Check units, maybe MwH?
993
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
994
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename(
995
        "cts_peak_load_in_w_2035"
996
    )
997
    df_building_profiles = calc_building_profiles(scenario="eGon100RE")
998
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename(
999
        "cts_peak_load_in_w_100RE"
1000
    )
1001
    df_peak_load = pd.concat(
1002
        [df_peak_load_2035, df_peak_load_100RE], axis=1
1003
    ).reset_index()
1004
1005
    df_peak_load["type"] = "cts"
1006
1007
    # Delete rows with cts demand
1008
    with db.session_scope() as session:
1009
        session.query(BuildingPeakLoads).filter(
1010
            BuildingPeakLoads.type == "cts"
1011
        ).delete()
1012
1013
    # Write peak loads into db
1014
    with db.session_scope() as session:
1015
        session.bulk_insert_mappings(
1016
            BuildingPeakLoads,
1017
            df_peak_load.to_dict(orient="records"),
1018
        )
1019
1020
1021
def delete_synthetic_cts_buildings():
1022
    """
1023
    All synthetic cts buildings are deleted from the DB. This is necessary if
1024
    the task is run multiple times as the existing synthetic buildings
1025
    influence the results.
1026
    """
1027
    # import db tables
1028
    from saio.openstreetmap import osm_buildings_synthetic
1029
1030
    # cells mit amenities
1031
    with db.session_scope() as session:
1032
        session.query(osm_buildings_synthetic).filter(
1033
            osm_buildings_synthetic.building == "cts"
1034
        ).delete()
1035
1036
1037
class CtsElectricityBuildings(Dataset):
1038
    def __init__(self, dependencies):
1039
        super().__init__(
1040
            name="CtsElectricityBuildings",
1041
            version="0.0.0.",
1042
            dependencies=dependencies,
1043
            tasks=(
1044
                cts_to_buildings,
1045
                get_peak_load_cts_buildings,
1046
                # get_all_cts_building_profiles,
1047
            ),
1048
        )
1049