Passed
Pull Request — dev (#826)
by
unknown
02:15
created

data.datasets.electricity_demand_timeseries.cts_buildings   A

Complexity

Total Complexity 41

Size/Duplication

Total Lines 1047
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 41
eloc 531
dl 0
loc 1047
rs 9.1199
c 0
b 0
f 0

14 Functions

Rating   Name   Duplication   Size   Complexity  
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A amenities_without_buildings() 0 60 2
A select_cts_buildings() 0 23 1
B cells_with_cts_demand_only() 0 72 2
A write_synthetic_buildings_to_db() 0 31 2
B buildings_with_amenities() 0 153 4
A calc_census_cell_share() 0 56 2
B buildings_without_amenities() 0 99 2
A delete_synthetic_cts_buildings() 0 14 2
A get_peak_load_cts_buildings() 0 26 2
C calc_building_profiles() 0 72 9
B cts_to_buildings() 0 155 2
A calc_building_demand_profile_share() 0 65 1

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 8 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    OsmBuildingsSynthetic,
17
)
18
from egon.data.datasets.electricity_demand_timeseries.tools import (
19
    random_ints_until_sum,
20
    random_point_in_square,
21
    specific_int_until_sum,
22
    write_table_to_postgis,
23
    write_table_to_postgres,
24
)
25
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
26
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
27
import egon.data.config
28
29
engine = db.engine()
30
Base = declarative_base()
31
32
data_config = egon.data.config.datasets()
33
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"]
34
35
# import db tables
36
saio.register_schema("openstreetmap", engine=engine)
37
saio.register_schema("society", engine=engine)
38
saio.register_schema("demand", engine=engine)
39
saio.register_schema("boundaries", engine=engine)
40
41
42
class EgonCtsElectricityDemandBuildingShare(Base):
43
    __tablename__ = "egon_cts_electricity_demand_building_share"
44
    __table_args__ = {"schema": "demand"}
45
46
    id = Column(Integer, primary_key=True)
47
    scenario = Column(String, primary_key=True)
48
    bus_id = Column(Integer, index=True)
49
    profile_share = Column(Float)
50
51
52
class CtsPeakLoads(Base):
53
    __tablename__ = "egon_cts_peak_loads"
54
    __table_args__ = {"schema": "demand"}
55
56
    id = Column(String, primary_key=True)
57
    cts_peak_load_in_w_2035 = Column(REAL)
58
    cts_peak_load_in_w_100RE = Column(REAL)
59
60
61
class CtsBuildings(Base):
62
    __tablename__ = "egon_cts_buildings"
63
    __table_args__ = {"schema": "openstreetmap"}
64
65
    serial = Column(Integer, primary_key=True)
66
    id = Column(Integer)
67
    zensus_population_id = Column(Integer, index=True)
68
    geom_building = Column(Geometry("Polygon", 3035))
69
    n_amenities_inside = Column(Integer)
70
    source = Column(String)
71
72
73
def amenities_without_buildings():
74
    """
75
    Amenities which have no buildings assigned and are in
76
    a cell with cts demand are determined.
77
78
    Returns
79
    -------
80
    pd.DataFrame
81
        Table of amenities without buildings
82
    """
83
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
84
85
    with db.session_scope() as session:
86
        cells_query = (
87
            session.query(
88
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
89
                # TODO can be used for square around amenity
90
                #  (1 geom_amenity: 1 geom_building)
91
                #  not unique amenity_ids yet
92
                osm_amenities_not_in_buildings_filtered.geom_amenity,
93
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
94
                # EgonDemandRegioZensusElectricity.demand,
95
                # # TODO can be used to generate n random buildings
96
                # # (n amenities : 1 randombuilding)
97
                # func.count(
98
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
99
                # ).label("n_amenities_inside"),
100
                # DestatisZensusPopulationPerHa.geom,
101
            )
102
            .filter(
103
                func.st_within(
104
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
105
                    DestatisZensusPopulationPerHa.geom,
106
                )
107
            )
108
            .filter(
109
                DestatisZensusPopulationPerHa.id
110
                == EgonDemandRegioZensusElectricity.zensus_population_id
111
            )
112
            .filter(
113
                EgonDemandRegioZensusElectricity.sector == "service",
114
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
115
                #         ).group_by(
116
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
117
                #             DestatisZensusPopulationPerHa.geom,
118
            )
119
        )
120
    # # TODO can be used to generate n random buildings
121
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
122
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
123
    # )
124
    #
125
126
    # # TODO can be used for square around amenity
127
    df_amenities_without_buildings = gpd.read_postgis(
128
        cells_query.statement,
129
        cells_query.session.bind,
130
        geom_col="geom_amenity",
131
    )
132
    return df_amenities_without_buildings
133
134
135
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
136
    """
137
    Building centroids are placed randomly within census cells.
138
    The Number of buildings is derived from n_amenity_inside, the selected
139
    method and number of amenities per building.
140
141
    Returns
142
    -------
143
    df: gpd.GeoDataFrame
144
        Table of buildings centroids
145
    """
146
    if isinstance(max_amenities, int):
147
        # amount of amenities is randomly generated within bounds
148
        # (max_amenities, amenities per cell)
149
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
150
            random_ints_until_sum, args=[max_amenities]
151
        )
152
    if isinstance(amenities, int):
153
        # Specific amount of amenities per building
154
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
155
            specific_int_until_sum, args=[amenities]
156
        )
157
158
    # Unnest each building
159
    df = df.explode(column="n_amenities_inside")
160
161
    # building count per cell
162
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
163
164
    # generate random synthetic buildings
165
    edge_length = 5
166
    # create random points within census cells
167
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
168
169
    df.reset_index(drop=True, inplace=True)
170
    # Store center of polygon
171
    df["geom_point"] = points
172
    # Drop geometry of census cell
173
    df = df.drop(columns=["geom"])
174
175
    return df
176
177
178
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
179
    """
180
    Synthetic buildings are generated around points.
181
182
    Parameters
183
    ----------
184
    df: pd.DataFrame
185
        Table of census cells
186
    points: gpd.GeoSeries or str
187
        List of points to place buildings around or column name of df
188
    crs: str
189
        CRS of result table
190
191
    Returns
192
    -------
193
    df: gpd.GeoDataFrame
194
        Synthetic buildings
195
    """
196
197
    if isinstance(points, str) and points in df.columns:
198
        points = df[points]
199
    elif isinstance(points, gpd.GeoSeries):
200
        pass
201
    else:
202
        raise ValueError("Points are of the wrong type")
203
204
    # Create building using a square around point
205
    edge_length = 5
206
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
207
208
    if "geom_point" not in df.columns:
209
        df["geom_point"] = df["geom_building"].centroid
210
211
    # TODO Check CRS
212
    df = gpd.GeoDataFrame(
213
        df,
214
        crs=crs,
215
        geometry="geom_building",
216
    )
217
218
    # TODO remove after implementation of egon_building_id
219
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
220
221
    # get max number of building ids from synthetic residential table
222
    with db.session_scope() as session:
223
        max_synth_residential_id = session.execute(
224
            func.max(OsmBuildingsSynthetic.id)
225
        ).scalar()
226
    max_synth_residential_id = int(max_synth_residential_id)
227
228
    # create sequential ids
229
    df["egon_building_id"] = range(
230
        max_synth_residential_id + 1,
231
        max_synth_residential_id + df.shape[0] + 1,
232
    )
233
234
    df["area"] = df["geom_building"].area
235
    # set building type of synthetic building
236
    df["building"] = "cts"
237
    # TODO remove in #772
238
    df = df.rename(
239
        columns={
240
            # "zensus_population_id": "cell_id",
241
            "egon_building_id": "id",
242
        }
243
    )
244
    return df
245
246
247
def buildings_with_amenities():
248
    """
249
    Amenities which are assigned to buildings are determined
250
    and grouped per building and zensus cell. Buildings
251
    covering multiple cells therefore exists multiple times
252
    but in different zensus cells. This is necessary to cover
253
    all cells with a cts demand. If buildings exist in multiple
254
    substations, their amenities are summed and assigned and kept in
255
    one substation only. If as a result, a census cell is uncovered,
256
    a synthetic amenity is placed. The buildings are aggregated
257
    afterwards during the calculation of the profile_share.
258
259
    Returns
260
    -------
261
    df_buildings_with_amenities: gpd.GeoDataFrame
262
        Contains all buildings with amenities per zensus cell.
263
    df_lost_cells: gpd.GeoDataFrame
264
        Contains synthetic amenities in lost cells. Might be empty
265
    """
266
267
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
268
269
    with db.session_scope() as session:
270
        cells_query = (
271
            session.query(
272
                osm_amenities_in_buildings_filtered,
273
                MapZensusGridDistricts.bus_id,
274
            )
275
            .filter(
276
                MapZensusGridDistricts.zensus_population_id
277
                == osm_amenities_in_buildings_filtered.zensus_population_id
278
            )
279
            .filter(
280
                EgonDemandRegioZensusElectricity.zensus_population_id
281
                == osm_amenities_in_buildings_filtered.zensus_population_id
282
            )
283
            .filter(
284
                EgonDemandRegioZensusElectricity.sector == "service",
285
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
286
            )
287
        )
288
    df_amenities_in_buildings = pd.read_sql(
289
        cells_query.statement, cells_query.session.bind, index_col=None
290
    )
291
292
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
293
        "geom_building"
294
    ].apply(to_shape)
295
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
296
        "geom_amenity"
297
    ].apply(to_shape)
298
299
    df_amenities_in_buildings["n_amenities_inside"] = 1
300
301
    # add identifier column for buildings in multiple substations
302
    df_amenities_in_buildings[
303
        "duplicate_identifier"
304
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
305
        "n_amenities_inside"
306
    ].transform(
307
        "cumsum"
308
    )
309
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
310
        ["id", "duplicate_identifier"]
311
    )
312
    # sum amenities of buildings with multiple substations
313
    df_amenities_in_buildings[
314
        "n_amenities_inside"
315
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
316
        "n_amenities_inside"
317
    ].transform(
318
        "sum"
319
    )
320
    # identify lost zensus cells
321
    df_lost_cells = df_amenities_in_buildings.loc[
322
        df_amenities_in_buildings.duplicated(
323
            subset=["id", "duplicate_identifier"], keep="first"
324
        )
325
    ]["zensus_population_id"]
326
327
    # drop duplicated buildings
328
    df_amenities_in_buildings.drop_duplicates(
329
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
330
    )
331
332
    # check if lost zensus cells are already covered
333
    if not (
334
        df_amenities_in_buildings["zensus_population_id"]
335
        .isin(df_lost_cells)
336
        .any()
337
    ):
338
        # query geom data for cell if not
339
        with db.session_scope() as session:
340
            cells_query = session.query(
341
                DestatisZensusPopulationPerHa.id,
342
                DestatisZensusPopulationPerHa.geom,
343
            ).filter(DestatisZensusPopulationPerHa.id.in_(df_lost_cells))
344
345
        df_lost_cells = gpd.read_postgis(
346
            cells_query.statement,
347
            cells_query.session.bind,
348
            geom_col="geom",
349
        )
350
        # TODO maybe adapt method
351
        # place random amenity in cell
352
        df_lost_cells["n_amenities_inside"] = 1
353
        df_lost_cells = place_buildings_with_amenities(
354
            df_lost_cells, amenities=1
355
        )
356
        df_lost_cells.rename(
357
            columns={
358
                "id": "zensus_population_id",
359
                "geom_point": "geom_amenity",
360
            },
361
            inplace=True,
362
        )
363
        df_lost_cells.drop(
364
            columns=["building_count", "n_amenities_inside"], inplace=True
365
        )
366
    else:
367
        df_lost_cells = None
368
369
    # drop helper columns
370
    df_amenities_in_buildings.drop(
371
        columns=["duplicate_identifier"], inplace=True
372
    )
373
374
    # sum amenities per building and cell
375
    df_amenities_in_buildings[
376
        "n_amenities_inside"
377
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
378
        "n_amenities_inside"
379
    ].transform(
380
        "sum"
381
    )
382
    # drop duplicated buildings
383
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
384
        ["id", "zensus_population_id"]
385
    )
386
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
387
388
    df_buildings_with_amenities = df_buildings_with_amenities[
389
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
390
    ]
391
    df_buildings_with_amenities.rename(
392
        columns={
393
            # "zensus_population_id": "cell_id",
394
            "egon_building_id": "id"
395
        },
396
        inplace=True,
397
    )
398
399
    return df_buildings_with_amenities, df_lost_cells
400
401
402
# TODO Remove as depricated
403
def write_synthetic_buildings_to_db(df_synthetic_buildings):
404
    """"""
405
    if "geom_point" not in df_synthetic_buildings.columns:
406
        df_synthetic_buildings["geom_point"] = df_synthetic_buildings[
407
            "geom_building"
408
        ].centroid
409
410
    df_synthetic_buildings = df_synthetic_buildings.rename(
411
        columns={
412
            "zensus_population_id": "cell_id",
413
            "egon_building_id": "id",
414
        }
415
    )
416
    # Only take existing columns
417
    columns = [
418
        column.key for column in OsmBuildingsSynthetic.__table__.columns
419
    ]
420
    df_synthetic_buildings = df_synthetic_buildings.loc[:, columns]
421
422
    dtypes = {
423
        i: OsmBuildingsSynthetic.__table__.columns[i].type
424
        for i in OsmBuildingsSynthetic.__table__.columns.keys()
425
    }
426
427
    # Write new buildings incl coord into db
428
    df_synthetic_buildings.to_postgis(
429
        name=OsmBuildingsSynthetic.__tablename__,
430
        con=engine,
431
        if_exists="append",
432
        schema=OsmBuildingsSynthetic.__table_args__["schema"],
433
        dtype=dtypes,
434
    )
435
436
437
def buildings_without_amenities():
438
    """
439
    Buildings (filtered and synthetic) in cells with
440
    cts demand but no amenities are determined.
441
442
    Returns
443
    -------
444
    df_buildings_without_amenities: gpd.GeoDataFrame
445
        Table of buildings without amenities in zensus cells
446
        with cts demand.
447
    """
448
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
449
    from saio.openstreetmap import (
450
        osm_amenities_shops_filtered,
451
        osm_buildings_filtered,
452
        osm_buildings_synthetic,
453
    )
454
455
    # buildings_filtered in cts-demand-cells without amenities
456
    with db.session_scope() as session:
457
458
        # Synthetic Buildings
459
        q_synth_buildings = session.query(
460
            osm_buildings_synthetic.cell_id.cast(Integer).label(
461
                "zensus_population_id"
462
            ),
463
            osm_buildings_synthetic.id.cast(Integer).label("id"),
464
            osm_buildings_synthetic.area.label("area"),
465
            osm_buildings_synthetic.geom_building.label("geom_building"),
466
            osm_buildings_synthetic.geom_point.label("geom_point"),
467
        )
468
469
        # Buildings filtered
470
        q_buildings_filtered = session.query(
471
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
472
            osm_buildings_filtered.id,
473
            osm_buildings_filtered.area,
474
            osm_buildings_filtered.geom_building,
475
            osm_buildings_filtered.geom_point,
476
        ).filter(
477
            osm_buildings_filtered.id
478
            == egon_map_zensus_buildings_filtered_all.id
479
        )
480
481
        # Amenities + zensus_population_id
482
        q_amenities = (
483
            session.query(
484
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
485
            )
486
            .filter(
487
                func.st_within(
488
                    osm_amenities_shops_filtered.geom_amenity,
489
                    DestatisZensusPopulationPerHa.geom,
490
                )
491
            )
492
            .distinct(DestatisZensusPopulationPerHa.id)
493
        )
494
495
        # Cells with CTS demand but without amenities
496
        q_cts_without_amenities = (
497
            session.query(
498
                EgonDemandRegioZensusElectricity.zensus_population_id,
499
            )
500
            .filter(
501
                EgonDemandRegioZensusElectricity.sector == "service",
502
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
503
            )
504
            .filter(
505
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
506
                    q_amenities
507
                )
508
            )
509
            .distinct()
510
        )
511
512
        # Buildings filtered + synthetic buildings residential in
513
        # cells with CTS demand but without amenities
514
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
515
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
516
                q_cts_without_amenities
517
            )
518
        )
519
520
    # df_buildings_without_amenities = pd.read_sql(
521
    #     cells_query.statement, cells_query.session.bind, index_col=None)
522
    df_buildings_without_amenities = gpd.read_postgis(
523
        cells_query.statement,
524
        cells_query.session.bind,
525
        geom_col="geom_building",
526
    )
527
528
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
529
        columns={
530
            # "zensus_population_id": "cell_id",
531
            "egon_building_id": "id",
532
        }
533
    )
534
535
    return df_buildings_without_amenities
536
537
538
def select_cts_buildings(df_buildings_wo_amenities):
539
    """
540
    Buildings (filtered and synthetic) in cells with
541
    cts demand are selected. Only the first building
542
    is taken for each cell and 1 amenities is assigned.
543
544
    Returns
545
    -------
546
    df_buildings_with_cts_demand: gpd.GeoDataFrame
547
        Table of buildings
548
    """
549
    # TODO Adapt method
550
    # Select one building each cell
551
    # take the first
552
    df_buildings_with_cts_demand = df_buildings_wo_amenities.drop_duplicates(
553
        # subset="cell_id", keep="first"
554
        subset="zensus_population_id",
555
        keep="first",
556
    ).reset_index(drop=True)
557
    df_buildings_with_cts_demand["n_amenities_inside"] = 1
558
    df_buildings_with_cts_demand["building"] = "cts"
559
560
    return df_buildings_with_cts_demand
561
562
563
def cells_with_cts_demand_only(df_buildings_without_amenities):
564
    """
565
    Cells with cts demand but no amenities or buildilngs
566
    are determined.
567
568
    Returns
569
    -------
570
    df_cells_only_cts_demand: gpd.GeoDataFrame
571
        Table of cells with cts demand but no amenities or buildings
572
    """
573
    from saio.openstreetmap import osm_amenities_shops_filtered
574
575
    # cells mit amenities
576
    with db.session_scope() as session:
577
        sub_query = (
578
            session.query(
579
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
580
            )
581
            .filter(
582
                func.st_within(
583
                    osm_amenities_shops_filtered.geom_amenity,
584
                    DestatisZensusPopulationPerHa.geom,
585
                )
586
            )
587
            .distinct(DestatisZensusPopulationPerHa.id)
588
        )
589
590
        cells_query = (
591
            session.query(
592
                EgonDemandRegioZensusElectricity.zensus_population_id,
593
                EgonDemandRegioZensusElectricity.scenario,
594
                EgonDemandRegioZensusElectricity.sector,
595
                EgonDemandRegioZensusElectricity.demand,
596
                DestatisZensusPopulationPerHa.geom,
597
            )
598
            .filter(
599
                EgonDemandRegioZensusElectricity.sector == "service",
600
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
601
            )
602
            .filter(
603
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
604
                    sub_query
605
                )
606
            )
607
            .filter(
608
                EgonDemandRegioZensusElectricity.zensus_population_id
609
                == DestatisZensusPopulationPerHa.id
610
            )
611
        )
612
613
    df_cts_cell_without_amenities = gpd.read_postgis(
614
        cells_query.statement,
615
        cells_query.session.bind,
616
        geom_col="geom",
617
        index_col=None,
618
    )
619
620
    # TODO maybe remove
621
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
622
        columns={"cell_id": "zensus_population_id"}
623
    )
624
625
    # Census cells with only cts demand
626
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
627
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
628
            df_buildings_without_amenities["zensus_population_id"].unique()
629
        )
630
    ]
631
632
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
633
634
    return df_cells_only_cts_demand
635
636
637
def calc_census_cell_share(scenario="eGon2035"):
638
    """
639
    The profile share for each census cell is calculated by it's
640
    share of annual demand per substation bus. The annual demand
641
    per cell is defined by DemandRegio. The share is for both
642
    scenarios identical as the annual demand is linearly scaled.
643
644
    Parameters
645
    ----------
646
    scenario: str
647
        Scenario for which the share is calculated.
648
649
    Returns
650
    -------
651
    df_census_share: pd.DataFrame
652
    """
653
654
    with db.session_scope() as session:
655
        cells_query = (
656
            session.query(
657
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
658
            )
659
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
660
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
661
            .filter(
662
                EgonDemandRegioZensusElectricity.zensus_population_id
663
                == MapZensusGridDistricts.zensus_population_id
664
            )
665
        )
666
667
    df_demand_regio_electricity_demand = pd.read_sql(
668
        cells_query.statement,
669
        cells_query.session.bind,
670
        index_col="zensus_population_id",
671
    )
672
673
    # get demand share of cell per bus
674
    df_census_share = df_demand_regio_electricity_demand[
675
        "demand"
676
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
677
        "demand"
678
    ].transform(
679
        "sum"
680
    )
681
    df_census_share = df_census_share.rename("cell_share")
682
683
    df_census_share = pd.concat(
684
        [
685
            df_census_share,
686
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
687
        ],
688
        axis=1,
689
    )
690
691
    df_census_share.reset_index(inplace=True)
692
    return df_census_share
693
694
695
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
696
    """
697
    Share of cts electricity demand profile per bus for every selected building
698
    is calculated. Building-amenity share is multiplied with census cell share
699
    to get the substation bus profile share for each building. The share is
700
    grouped and aggregated per building as some cover multiple cells.
701
702
    Parameters
703
    ----------
704
    df_cts_buildings: gpd.GeoDataFrame
705
        Table of all buildings with cts demand assigned
706
    scenario: str
707
        Scenario for which the share is calculated.
708
709
    Returns
710
    -------
711
    df_building_share: pd.DataFrame
712
        Table of bus profile share per building
713
714
    """
715
716
    def calc_building_amenity_share(df_cts_buildings):
717
        """
718
        Calculate the building share by the number amenities per building
719
        within a census cell.
720
        """
721
        df_building_amenity_share = df_cts_buildings[
722
            "n_amenities_inside"
723
        ] / df_cts_buildings.groupby("zensus_population_id")[
724
            "n_amenities_inside"
725
        ].transform(
726
            "sum"
727
        )
728
        df_building_amenity_share = pd.concat(
729
            [
730
                df_building_amenity_share.rename("building_amenity_share"),
731
                df_cts_buildings[["zensus_population_id", "id"]],
732
            ],
733
            axis=1,
734
        )
735
        return df_building_amenity_share
736
737
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
738
739
    df_census_cell_share = calc_census_cell_share(scenario)
740
741
    df_demand_share = pd.merge(
742
        left=df_building_amenity_share,
743
        right=df_census_cell_share,
744
        left_on="zensus_population_id",
745
        right_on="zensus_population_id",
746
    )
747
    df_demand_share["profile_share"] = df_demand_share[
748
        "building_amenity_share"
749
    ].multiply(df_demand_share["cell_share"])
750
751
    df_demand_share = df_demand_share[
752
        ["id", "bus_id", "scenario", "profile_share"]
753
    ]
754
    # Group and aggregate per building for multi cell buildings
755
    df_demand_share = (
756
        df_demand_share.groupby(["scenario", "id"]).sum().reset_index()
757
    )
758
759
    return df_demand_share
760
761
762
def calc_building_profiles(
763
    df_demand_share=None,
764
    egon_building_id=None,
765
    bus_id=None,
766
    scenario="eGon2035",
767
):
768
    """
769
    Calculate the demand profile for each building. The profile is
770
    calculated by the demand share of the building per substation bus.
771
772
    Parameters
773
    ----------
774
    df_demand_share: pd.DataFrame
775
        Table of demand share per building. If not given, table is
776
        sourced from database.
777
    egon_building_id: int
778
        Id of the building for which the profile is calculated. If not
779
        given, the profiles are calculated for all buildings.
780
781
    Returns
782
    -------
783
    df_building_profiles: pd.DataFrame
784
        Table of demand profile per building
785
    """
786
787
    if not isinstance(df_demand_share, pd.DataFrame):
788
        with db.session_scope() as session:
789
            cells_query = session.query(
790
                EgonCtsElectricityDemandBuildingShare,
791
            ).filter(
792
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
793
            )
794
795
        df_demand_share = pd.read_sql(
796
            cells_query.statement, cells_query.session.bind, index_col=None
797
        )
798
    else:
799
        df_demand_share = df_demand_share.loc[
800
            df_demand_share["scenario"] == scenario
801
        ]
802
803
    df_cts_profiles = calc_load_curves_cts(scenario)
804
805
    # get demand share of selected building id
806
    if isinstance(egon_building_id, int):
807
        if egon_building_id in df_demand_share["id"]:
808
            df_demand_share = df_demand_share.loc[
809
                df_demand_share["id"] == egon_building_id
810
            ]
811
        else:
812
            raise KeyError(f"Building with id {egon_building_id} not found")
813
814
    # get demand share of all buildings for selected bus id
815
    if isinstance(bus_id, int):
816
        if bus_id in df_demand_share["bus_id"]:
817
            df_demand_share = df_demand_share.loc[
818
                df_demand_share["bus_id"] == bus_id
819
            ]
820
        else:
821
            raise KeyError(f"Bus with id {bus_id} not found")
822
823
    # get demand profile for all buildings for selected demand share
824
    df_building_profiles = pd.DataFrame()
825
    for bus_id, df in df_demand_share.groupby("bus_id"):
826
        shares = df.set_index("id", drop=True)["profile_share"]
827
        profile = df_cts_profiles.loc[:, bus_id]
828
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 825 is not entered. Are you sure this can never be the case?
Loading history...
829
        df_building_profiles = pd.concat(
830
            [df_building_profiles, building_profiles], axis=1
831
        )
832
833
    return df_building_profiles
834
835
836
def cts_to_buildings():
837
    """
838
    Assigns CTS demand to buildings and calculates the respective demand
839
    profiles. The demand profile per substation are disaggregated per
840
    annual demand share of each census cell and by the number of amenities
841
    per building within the cell. If no building data is available,
842
    synthetic buildings are generated around the amenities. If no amenities
843
    but cts demand is available, buildings are randomly selected. If no
844
    building nor amenity is available, random synthetic buildings are
845
    generated. The demand share is stored in the database.
846
847
    Note:
848
    -----
849
    Cells with CTS demand, amenities and buildings do not change within
850
    the scenarios, only the demand itself. Therefore scenario eGon2035
851
    can be used universally to determine the cts buildings but not for
852
    he demand share.
853
    """
854
855
    # Buildings with amenities
856
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
857
858
    # Remove synthetic CTS buildings if existing
859
    delete_synthetic_cts_buildings()
860
861
    # Create synthetic buildings for amenites without buildings
862
    df_amenities_without_buildings = amenities_without_buildings()
863
    if df_lost_cells:
864
        df_amenities_without_buildings = df_amenities_without_buildings.append(
865
            df_lost_cells, ignore_index=True
866
        )
867
868
    df_amenities_without_buildings["n_amenities_inside"] = 1
869
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
870
        df_amenities_without_buildings, points="geom_amenity"
871
    )
872
873
    # TODO write to DB and remove renaming
874
    # write_synthetic_buildings_to_db(df_synthetic_buildings_with_amenities)
875
    write_table_to_postgis(
876
        df_synthetic_buildings_with_amenities.rename(
877
            columns={
878
                "zensus_population_id": "cell_id",
879
                "egon_building_id": "id",
880
            }
881
        ),
882
        OsmBuildingsSynthetic,
883
        drop=False,
884
    )
885
886
    # Cells without amenities but CTS demand and buildings
887
    df_buildings_without_amenities = buildings_without_amenities()
888
889
    # TODO Fix Adhoc Bugfix duplicated buildings
890
    mask = df_buildings_without_amenities.loc[
891
        df_buildings_without_amenities["id"].isin(
892
            df_buildings_with_amenities["id"]
893
        )
894
    ].index
895
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
896
        index=mask
897
    ).reset_index(drop=True)
898
899
    df_buildings_without_amenities = select_cts_buildings(
900
        df_buildings_without_amenities
901
    )
902
    df_buildings_without_amenities["n_amenities_inside"] = 1
903
904
    # Create synthetic amenities and buildings in cells with only CTS demand
905
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
906
        df_buildings_without_amenities
907
    )
908
    # Only 1 Amenity per cell
909
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
910
    # Only 1 Amenity per Building
911
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
912
        df_cells_with_cts_demand_only, amenities=1
913
    )
914
    # Leads to only 1 building per cell
915
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
916
        df_cells_with_cts_demand_only, points="geom_point"
917
    )
918
919
    # TODO write to DB and remove renaming
920
    # write_synthetic_buildings_to_db(df_synthetic_buildings_without_amenities)
921
    write_table_to_postgis(
922
        df_synthetic_buildings_without_amenities.rename(
923
            columns={
924
                "zensus_population_id": "cell_id",
925
                "egon_building_id": "id",
926
            }
927
        ),
928
        OsmBuildingsSynthetic,
929
        drop=False,
930
    )
931
932
    # Concat all buildings
933
    columns = [
934
        "zensus_population_id",
935
        "id",
936
        "geom_building",
937
        "n_amenities_inside",
938
        "source",
939
    ]
940
941
    df_buildings_with_amenities["source"] = "bwa"
942
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
943
    df_buildings_without_amenities["source"] = "bwoa"
944
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
945
946
    df_cts_buildings = pd.concat(
947
        [
948
            df_buildings_with_amenities[columns],
949
            df_synthetic_buildings_with_amenities[columns],
950
            df_buildings_without_amenities[columns],
951
            df_synthetic_buildings_without_amenities[columns],
952
        ],
953
        axis=0,
954
        ignore_index=True,
955
    )
956
    # TODO maybe remove after #772
957
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
958
959
    # Write table to db for debugging
960
    # TODO remove later
961
    df_cts_buildings = gpd.GeoDataFrame(
962
        df_cts_buildings, geometry="geom_building", crs=3035
963
    )
964
    df_cts_buildings = df_cts_buildings.reset_index().rename(
965
        columns={"index": "serial"}
966
    )
967
    write_table_to_postgis(
968
        df_cts_buildings,
969
        CtsBuildings,
970
        drop=True,
971
    )
972
973
    df_demand_share_2035 = calc_building_demand_profile_share(
974
        df_cts_buildings, scenario="eGon2035"
975
    )
976
    df_demand_share_100RE = calc_building_demand_profile_share(
977
        df_cts_buildings, scenario="eGon100RE"
978
    )
979
980
    df_demand_share = pd.concat(
981
        [df_demand_share_2035, df_demand_share_100RE],
982
        axis=0,
983
        ignore_index=True,
984
    )
985
986
    write_table_to_postgres(
987
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
988
    )
989
990
    return df_cts_buildings, df_demand_share
991
992
993
def get_peak_load_cts_buildings():
994
    """
995
    Get peak load of all CTS buildings for both scenarios and store in DB.
996
    """
997
998
    # TODO Check units, maybe MwH?
999
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
1000
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename(
1001
        "cts_peak_load_in_w_2035"
1002
    )
1003
    df_building_profiles = calc_building_profiles(scenario="eGon100RE")
1004
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename(
1005
        "cts_peak_load_in_w_100RE"
1006
    )
1007
    df_peak_load = pd.concat(
1008
        [df_peak_load_2035, df_peak_load_100RE], axis=1
1009
    ).reset_index()
1010
1011
    CtsPeakLoads.__table__.drop(bind=engine, checkfirst=True)
1012
    CtsPeakLoads.__table__.create(bind=engine, checkfirst=True)
1013
1014
    # Write peak loads into db
1015
    with db.session_scope() as session:
1016
        session.bulk_insert_mappings(
1017
            CtsPeakLoads,
1018
            df_peak_load.to_dict(orient="records"),
1019
        )
1020
1021
1022
def delete_synthetic_cts_buildings():
1023
    """
1024
    All synthetic cts buildings are deleted from the DB. This is necessary if
1025
    the task is run multiple times as the existing synthetic buildings
1026
    influence the results.
1027
    """
1028
    # import db tables
1029
    from saio.openstreetmap import osm_buildings_synthetic
1030
1031
    # cells mit amenities
1032
    with db.session_scope() as session:
1033
        session.query(osm_buildings_synthetic).filter(
1034
            osm_buildings_synthetic.building == "cts"
1035
        ).delete()
1036
1037
1038
class CtsElectricityBuildings(Dataset):
1039
    def __init__(self, dependencies):
1040
        super().__init__(
1041
            name="CtsElectricityBuildings",
1042
            version="0.0.0.",
1043
            dependencies=dependencies,
1044
            tasks=(
1045
                cts_to_buildings,
1046
                get_peak_load_cts_buildings,
1047
                # get_all_cts_building_profiles,
1048
            ),
1049
        )
1050