Passed
Pull Request — dev (#826)
by
unknown
01:26
created

data.datasets.electricity_demand_timeseries.cts_buildings   A

Complexity

Total Complexity 41

Size/Duplication

Total Lines 1049
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 41
eloc 535
dl 0
loc 1049
rs 9.1199
c 0
b 0
f 0

14 Functions

Rating   Name   Duplication   Size   Complexity  
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A amenities_without_buildings() 0 60 2
A delete_synthetic_cts_buildings() 0 14 2
A get_peak_load_cts_buildings() 0 26 2
C calc_building_profiles() 0 64 9
A select_cts_buildings() 0 23 1
B cells_with_cts_demand_only() 0 72 2
A write_synthetic_buildings_to_db() 0 31 2
B cts_to_buildings() 0 166 2
A calc_building_demand_profile_share() 0 67 1
B buildings_with_amenities() 0 150 4
A calc_census_cell_share() 0 56 2
B buildings_without_amenities() 0 99 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 8 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    OsmBuildingsSynthetic,
17
)
18
from egon.data.datasets.electricity_demand_timeseries.tools import (
19
    random_ints_until_sum,
20
    random_point_in_square,
21
    specific_int_until_sum,
22
    write_table_to_postgis,
23
    write_table_to_postgres,
24
)
25
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
26
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
27
import egon.data.config
28
29
engine = db.engine()
30
Base = declarative_base()
31
32
data_config = egon.data.config.datasets()
33
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"]
34
35
# import db tables
36
saio.register_schema("openstreetmap", engine=engine)
37
saio.register_schema("society", engine=engine)
38
saio.register_schema("demand", engine=engine)
39
saio.register_schema("boundaries", engine=engine)
40
41
42
class EgonCtsElectricityDemandBuildingShare(Base):
43
    __tablename__ = "egon_cts_electricity_demand_building_share"
44
    __table_args__ = {"schema": "demand"}
45
46
    id = Column(Integer, primary_key=True)
47
    scenario = Column(String, primary_key=True)
48
    bus_id = Column(Integer, index=True)
49
    profile_share = Column(Float)
50
51
52
class CtsPeakLoads(Base):
53
    __tablename__ = "egon_cts_peak_loads"
54
    __table_args__ = {"schema": "demand"}
55
56
    id = Column(String, primary_key=True)
57
    cts_peak_load_in_w_2035 = Column(REAL)
58
    cts_peak_load_in_w_100RE = Column(REAL)
59
60
61
class CtsBuildings(Base):
62
    __tablename__ = "egon_cts_buildings"
63
    __table_args__ = {"schema": "openstreetmap"}
64
65
    serial = Column(Integer, primary_key=True)
66
    id = Column(Integer)
67
    zensus_population_id = Column(Integer, index=True)
68
    geom_building = Column(Geometry("Polygon", 3035))
69
    n_amenities_inside = Column(Integer)
70
    source = Column(String)
71
72
73
def amenities_without_buildings():
74
    """
75
    Amenities which have no buildings assigned and are in
76
    a cell with cts demand are determined.
77
78
    Returns
79
    -------
80
    pd.DataFrame
81
        Table of amenities without buildings
82
    """
83
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
84
85
    with db.session_scope() as session:
86
        cells_query = (
87
            session.query(
88
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
89
                # TODO can be used for square around amenity
90
                #  (1 geom_amenity: 1 geom_building)
91
                #  not unique amenity_ids yet
92
                osm_amenities_not_in_buildings_filtered.geom_amenity,
93
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
94
                # EgonDemandRegioZensusElectricity.demand,
95
                # # TODO can be used to generate n random buildings
96
                # # (n amenities : 1 randombuilding)
97
                # func.count(
98
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
99
                # ).label("n_amenities_inside"),
100
                # DestatisZensusPopulationPerHa.geom,
101
            )
102
            .filter(
103
                func.st_within(
104
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
105
                    DestatisZensusPopulationPerHa.geom,
106
                )
107
            )
108
            .filter(
109
                DestatisZensusPopulationPerHa.id
110
                == EgonDemandRegioZensusElectricity.zensus_population_id
111
            )
112
            .filter(
113
                EgonDemandRegioZensusElectricity.sector == "service",
114
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
115
                #         ).group_by(
116
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
117
                #             DestatisZensusPopulationPerHa.geom,
118
            )
119
        )
120
    # # TODO can be used to generate n random buildings
121
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
122
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
123
    # )
124
    #
125
126
    # # TODO can be used for square around amenity
127
    df_amenities_without_buildings = gpd.read_postgis(
128
        cells_query.statement,
129
        cells_query.session.bind,
130
        geom_col="geom_amenity",
131
    )
132
    return df_amenities_without_buildings
133
134
135
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
136
    """
137
    Building centroids are placed randomly within census cells.
138
    The Number of buildings is derived from n_amenity_inside, the selected
139
    method and number of amenities per building.
140
141
    Returns
142
    -------
143
    df: gpd.GeoDataFrame
144
        Table of buildings centroids
145
    """
146
    if isinstance(max_amenities, int):
147
        # amount of amenities is randomly generated within bounds
148
        # (max_amenities, amenities per cell)
149
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
150
            random_ints_until_sum, args=[max_amenities]
151
        )
152
    if isinstance(amenities, int):
153
        # Specific amount of amenities per building
154
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
155
            specific_int_until_sum, args=[amenities]
156
        )
157
158
    # Unnest each building
159
    df = df.explode(column="n_amenities_inside")
160
161
    # building count per cell
162
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
163
164
    # generate random synthetic buildings
165
    edge_length = 5
166
    # create random points within census cells
167
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
168
169
    df.reset_index(drop=True, inplace=True)
170
    # Store center of polygon
171
    df["geom_point"] = points
172
    # Drop geometry of census cell
173
    df = df.drop(columns=["geom"])
174
175
    return df
176
177
178
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
179
    """
180
    Synthetic buildings are generated around points.
181
182
    Parameters
183
    ----------
184
    df: pd.DataFrame
185
        Table of census cells
186
    points: gpd.GeoSeries or str
187
        List of points to place buildings around or column name of df
188
    crs: str
189
        CRS of result table
190
191
    Returns
192
    -------
193
    df: gpd.GeoDataFrame
194
        Synthetic buildings
195
    """
196
197
    if isinstance(points, str) and points in df.columns:
198
        points = df[points]
199
    elif isinstance(points, gpd.GeoSeries):
200
        pass
201
    else:
202
        raise ValueError("Points are of the wrong type")
203
204
    # Create building using a square around point
205
    edge_length = 5
206
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
207
208
    if "geom_point" not in df.columns:
209
        df["geom_point"] = df["geom_building"].centroid
210
211
    # TODO Check CRS
212
    df = gpd.GeoDataFrame(
213
        df,
214
        crs=crs,
215
        geometry="geom_building",
216
    )
217
218
    # TODO remove after implementation of egon_building_id
219
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
220
221
    # get max number of building ids from synthetic residential table
222
    with db.session_scope() as session:
223
        max_synth_residential_id = session.execute(
224
            func.max(OsmBuildingsSynthetic.id)
225
        ).scalar()
226
    max_synth_residential_id = int(max_synth_residential_id)
227
228
    # create sequential ids
229
    df["egon_building_id"] = range(
230
        max_synth_residential_id + 1,
231
        max_synth_residential_id + df.shape[0] + 1,
232
    )
233
234
    df["area"] = df["geom_building"].area
235
    # set building type of synthetic building
236
    df["building"] = "cts"
237
    # TODO remove in #772
238
    df = df.rename(
239
        columns={
240
            # "zensus_population_id": "cell_id",
241
            "egon_building_id": "id",
242
        }
243
    )
244
    return df
245
246
247
def buildings_with_amenities():
248
    """
249
    Amenities which are assigned to buildings are determined
250
    and grouped per building and zensus cell. Buildings
251
    covering multiple cells therefore exists multiple times
252
    but in different zensus cells. This is necessary to cover
253
    all cells with a cts demand. If buildings exist in multiple
254
    substations, their amenities are summed and assigned and kept in
255
    one substation only. If as a result, a census cell is uncovered,
256
    a synthetic amenity is placed. The buildings are aggregated
257
    afterwards during the calculation of the profile_share.
258
259
    Returns
260
    -------
261
    df_buildings_with_amenities: gpd.GeoDataFrame
262
        Contains all buildings with amenities per zensus cell.
263
    df_lost_cells: gpd.GeoDataFrame
264
        Contains synthetic amenities in lost cells. Might be empty
265
    """
266
267
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
268
269
    with db.session_scope() as session:
270
        cells_query = (
271
            session.query(
272
                osm_amenities_in_buildings_filtered,
273
                MapZensusGridDistricts.bus_id,
274
            )
275
            .filter(
276
                MapZensusGridDistricts.zensus_population_id
277
                == osm_amenities_in_buildings_filtered.zensus_population_id
278
            )
279
            .filter(
280
                EgonDemandRegioZensusElectricity.zensus_population_id
281
                == osm_amenities_in_buildings_filtered.zensus_population_id
282
            )
283
            .filter(
284
                EgonDemandRegioZensusElectricity.sector == "service",
285
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
286
            )
287
        )
288
    df_amenities_in_buildings = pd.read_sql(
289
        cells_query.statement, cells_query.session.bind, index_col=None
290
    )
291
292
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
293
        "geom_building"
294
    ].apply(to_shape)
295
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
296
        "geom_amenity"
297
    ].apply(to_shape)
298
299
    df_amenities_in_buildings["n_amenities_inside"] = 1
300
301
    # add identifier column for buildings in multiple substations
302
    df_amenities_in_buildings[
303
        "duplicate_identifier"
304
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
305
        "n_amenities_inside"
306
    ].transform(
307
        "cumsum"
308
    )
309
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
310
        ["id", "duplicate_identifier"]
311
    )
312
    # sum amenities of buildings with multiple substations
313
    df_amenities_in_buildings[
314
        "n_amenities_inside"
315
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
316
        "n_amenities_inside"
317
    ].transform(
318
        "sum"
319
    )
320
    # identify lost zensus cells
321
    df_lost_cells = df_amenities_in_buildings.loc[
322
        df_amenities_in_buildings.duplicated(
323
            subset=["id", "duplicate_identifier"], keep="first"
324
        )
325
    ]["zensus_population_id"]
326
327
    # check if lost zensus cells are already covered
328
    if (
329
        df_amenities_in_buildings["zensus_population_id"]
330
        .isin(df_lost_cells)
331
        .any()
332
    ):
333
        # query geom data for cell if not
334
        with db.session_scope() as session:
335
            cells_query = session.query(
336
                DestatisZensusPopulationPerHa.id,
337
                DestatisZensusPopulationPerHa.geom,
338
            ).filter(DestatisZensusPopulationPerHa.id.in_(df_lost_cells))
339
340
        df_lost_cells = gpd.read_postgis(
341
            cells_query.statement,
342
            cells_query.session.bind,
343
            geom_col="geom",
344
        )
345
        # TODO maybe adapt method
346
        # place random amenity in cell
347
        df_lost_cells["n_amenities_inside"] = 1
348
        df_lost_cells = place_buildings_with_amenities(
349
            df_lost_cells, amenities=1
350
        )
351
        df_lost_cells.rename(
352
            columns={
353
                "id": "zensus_population_id",
354
                "geom_point": "geom_amenity",
355
            },
356
            inplace=True,
357
        )
358
        df_lost_cells.drop(
359
            columns=["building_count", "n_amenities_inside"], inplace=True
360
        )
361
362
    # drop duplicated buildings
363
    df_amenities_in_buildings.drop_duplicates(
364
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
365
    )
366
    # drop helper columns
367
    df_amenities_in_buildings.drop(
368
        columns=["duplicate_identifier"], inplace=True
369
    )
370
371
    # sum amenities per building and cell
372
    df_amenities_in_buildings[
373
        "n_amenities_inside"
374
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
375
        "n_amenities_inside"
376
    ].transform(
377
        "sum"
378
    )
379
    # drop duplicated buildings
380
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
381
        ["id", "zensus_population_id"]
382
    )
383
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
384
385
    df_buildings_with_amenities = df_buildings_with_amenities[
386
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
387
    ]
388
    df_buildings_with_amenities.rename(
389
        columns={
390
            # "zensus_population_id": "cell_id",
391
            "egon_building_id": "id"
392
        },
393
        inplace=True,
394
    )
395
396
    return df_buildings_with_amenities, df_lost_cells
397
398
399
# TODO Remove as depricated
400
def write_synthetic_buildings_to_db(df_synthetic_buildings):
401
    """"""
402
    if "geom_point" not in df_synthetic_buildings.columns:
403
        df_synthetic_buildings["geom_point"] = df_synthetic_buildings[
404
            "geom_building"
405
        ].centroid
406
407
    df_synthetic_buildings = df_synthetic_buildings.rename(
408
        columns={
409
            "zensus_population_id": "cell_id",
410
            "egon_building_id": "id",
411
        }
412
    )
413
    # Only take existing columns
414
    columns = [
415
        column.key for column in OsmBuildingsSynthetic.__table__.columns
416
    ]
417
    df_synthetic_buildings = df_synthetic_buildings.loc[:, columns]
418
419
    dtypes = {
420
        i: OsmBuildingsSynthetic.__table__.columns[i].type
421
        for i in OsmBuildingsSynthetic.__table__.columns.keys()
422
    }
423
424
    # Write new buildings incl coord into db
425
    df_synthetic_buildings.to_postgis(
426
        name=OsmBuildingsSynthetic.__tablename__,
427
        con=engine,
428
        if_exists="append",
429
        schema=OsmBuildingsSynthetic.__table_args__["schema"],
430
        dtype=dtypes,
431
    )
432
433
434
def buildings_without_amenities():
435
    """
436
    Buildings (filtered and synthetic) in cells with
437
    cts demand but no amenities are determined.
438
439
    Returns
440
    -------
441
    df_buildings_without_amenities: gpd.GeoDataFrame
442
        Table of buildings without amenities in zensus cells
443
        with cts demand.
444
    """
445
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
446
    from saio.openstreetmap import (
447
        osm_amenities_shops_filtered,
448
        osm_buildings_filtered,
449
        osm_buildings_synthetic,
450
    )
451
452
    # buildings_filtered in cts-demand-cells without amenities
453
    with db.session_scope() as session:
454
455
        # Synthetic Buildings
456
        q_synth_buildings = session.query(
457
            osm_buildings_synthetic.cell_id.cast(Integer).label(
458
                "zensus_population_id"
459
            ),
460
            osm_buildings_synthetic.id.cast(Integer).label("id"),
461
            osm_buildings_synthetic.area.label("area"),
462
            osm_buildings_synthetic.geom_building.label("geom_building"),
463
            osm_buildings_synthetic.geom_point.label("geom_point"),
464
        )
465
466
        # Buildings filtered
467
        q_buildings_filtered = session.query(
468
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
469
            osm_buildings_filtered.id,
470
            osm_buildings_filtered.area,
471
            osm_buildings_filtered.geom_building,
472
            osm_buildings_filtered.geom_point,
473
        ).filter(
474
            osm_buildings_filtered.id
475
            == egon_map_zensus_buildings_filtered_all.id
476
        )
477
478
        # Amenities + zensus_population_id
479
        q_amenities = (
480
            session.query(
481
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
482
            )
483
            .filter(
484
                func.st_within(
485
                    osm_amenities_shops_filtered.geom_amenity,
486
                    DestatisZensusPopulationPerHa.geom,
487
                )
488
            )
489
            .distinct(DestatisZensusPopulationPerHa.id)
490
        )
491
492
        # Cells with CTS demand but without amenities
493
        q_cts_without_amenities = (
494
            session.query(
495
                EgonDemandRegioZensusElectricity.zensus_population_id,
496
            )
497
            .filter(
498
                EgonDemandRegioZensusElectricity.sector == "service",
499
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
500
            )
501
            .filter(
502
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
503
                    q_amenities
504
                )
505
            )
506
            .distinct()
507
        )
508
509
        # Buildings filtered + synthetic buildings residential in
510
        # cells with CTS demand but without amenities
511
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
512
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
513
                q_cts_without_amenities
514
            )
515
        )
516
517
    # df_buildings_without_amenities = pd.read_sql(
518
    #     cells_query.statement, cells_query.session.bind, index_col=None)
519
    df_buildings_without_amenities = gpd.read_postgis(
520
        cells_query.statement,
521
        cells_query.session.bind,
522
        geom_col="geom_building",
523
    )
524
525
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
526
        columns={
527
            # "zensus_population_id": "cell_id",
528
            "egon_building_id": "id",
529
        }
530
    )
531
532
    return df_buildings_without_amenities
533
534
535
def select_cts_buildings(df_buildings_wo_amenities):
536
    """
537
    Buildings (filtered and synthetic) in cells with
538
    cts demand are selected. Only the first building
539
    is taken for each cell and 1 amenities is assigned.
540
541
    Returns
542
    -------
543
    df_buildings_with_cts_demand: gpd.GeoDataFrame
544
        Table of buildings
545
    """
546
    # TODO Adapt method
547
    # Select one building each cell
548
    # take the first
549
    df_buildings_with_cts_demand = df_buildings_wo_amenities.drop_duplicates(
550
        # subset="cell_id", keep="first"
551
        subset="zensus_population_id",
552
        keep="first",
553
    ).reset_index(drop=True)
554
    df_buildings_with_cts_demand["n_amenities_inside"] = 1
555
    df_buildings_with_cts_demand["building"] = "cts"
556
557
    return df_buildings_with_cts_demand
558
559
560
def cells_with_cts_demand_only(df_buildings_without_amenities):
561
    """
562
    Cells with cts demand but no amenities or buildilngs
563
    are determined.
564
565
    Returns
566
    -------
567
    df_cells_only_cts_demand: gpd.GeoDataFrame
568
        Table of cells with cts demand but no amenities or buildings
569
    """
570
    from saio.openstreetmap import osm_amenities_shops_filtered
571
572
    # cells mit amenities
573
    with db.session_scope() as session:
574
        sub_query = (
575
            session.query(
576
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
577
            )
578
            .filter(
579
                func.st_within(
580
                    osm_amenities_shops_filtered.geom_amenity,
581
                    DestatisZensusPopulationPerHa.geom,
582
                )
583
            )
584
            .distinct(DestatisZensusPopulationPerHa.id)
585
        )
586
587
        cells_query = (
588
            session.query(
589
                EgonDemandRegioZensusElectricity.zensus_population_id,
590
                EgonDemandRegioZensusElectricity.scenario,
591
                EgonDemandRegioZensusElectricity.sector,
592
                EgonDemandRegioZensusElectricity.demand,
593
                DestatisZensusPopulationPerHa.geom,
594
            )
595
            .filter(
596
                EgonDemandRegioZensusElectricity.sector == "service",
597
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
598
            )
599
            .filter(
600
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
601
                    sub_query
602
                )
603
            )
604
            .filter(
605
                EgonDemandRegioZensusElectricity.zensus_population_id
606
                == DestatisZensusPopulationPerHa.id
607
            )
608
        )
609
610
    df_cts_cell_without_amenities = gpd.read_postgis(
611
        cells_query.statement,
612
        cells_query.session.bind,
613
        geom_col="geom",
614
        index_col=None,
615
    )
616
617
    # TODO maybe remove
618
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
619
        columns={"cell_id": "zensus_population_id"}
620
    )
621
622
    # Census cells with only cts demand
623
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
624
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
625
            df_buildings_without_amenities["zensus_population_id"].unique()
626
        )
627
    ]
628
629
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
630
631
    return df_cells_only_cts_demand
632
633
634
def calc_census_cell_share(scenario="eGon2035"):
635
    """
636
    The profile share for each census cell is calculated by it's
637
    share of annual demand per substation bus. The annual demand
638
    per cell is defined by DemandRegio. The share is for both
639
    scenarios identical as the annual demand is linearly scaled.
640
641
    Parameters
642
    ----------
643
    scenario: str
644
        Scenario for which the share is calculated.
645
646
    Returns
647
    -------
648
    df_census_share: pd.DataFrame
649
    """
650
651
    with db.session_scope() as session:
652
        cells_query = (
653
            session.query(
654
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
655
            )
656
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
657
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
658
            .filter(
659
                EgonDemandRegioZensusElectricity.zensus_population_id
660
                == MapZensusGridDistricts.zensus_population_id
661
            )
662
        )
663
664
    df_demand_regio_electricity_demand = pd.read_sql(
665
        cells_query.statement,
666
        cells_query.session.bind,
667
        index_col="zensus_population_id",
668
    )
669
670
    # get demand share of cell per bus
671
    df_census_share = df_demand_regio_electricity_demand[
672
        "demand"
673
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
674
        "demand"
675
    ].transform(
676
        "sum"
677
    )
678
    df_census_share = df_census_share.rename("cell_share")
679
680
    df_census_share = pd.concat(
681
        [
682
            df_census_share,
683
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
684
        ],
685
        axis=1,
686
    )
687
688
    df_census_share.reset_index(inplace=True)
689
    return df_census_share
690
691
692
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
693
    """
694
    Share of cts electricity demand profile per bus for every selected building
695
    is calculated. Building-amenity share is multiplied with census cell share
696
    to get the substation bus profile share for each building. The share is
697
    grouped and aggregated per building as some cover multiple cells.
698
699
    Parameters
700
    ----------
701
    df_cts_buildings: gpd.GeoDataFrame
702
        Table of all buildings with cts demand assigned
703
    scenario: str
704
        Scenario for which the share is calculated.
705
706
    Returns
707
    -------
708
    df_building_share: pd.DataFrame
709
        Table of bus profile share per building
710
711
    """
712
713
    def calc_building_amenity_share(df_cts_buildings):
714
        """
715
        Calculate the building share by the number amenities per building
716
        within a census cell.
717
        """
718
        df_building_amenity_share = df_cts_buildings[
719
            "n_amenities_inside"
720
        ] / df_cts_buildings.groupby("zensus_population_id")[
721
            "n_amenities_inside"
722
        ].transform(
723
            "sum"
724
        )
725
        df_building_amenity_share = pd.concat(
726
            [
727
                df_building_amenity_share.rename("building_amenity_share"),
728
                df_cts_buildings[["zensus_population_id", "id"]],
729
            ],
730
            axis=1,
731
        )
732
        return df_building_amenity_share
733
734
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
735
736
    df_census_cell_share = calc_census_cell_share(scenario)
737
738
    df_demand_share = pd.merge(
739
        left=df_building_amenity_share,
740
        right=df_census_cell_share,
741
        left_on="zensus_population_id",
742
        right_on="zensus_population_id",
743
    )
744
    df_demand_share["profile_share"] = df_demand_share[
745
        "building_amenity_share"
746
    ].multiply(df_demand_share["cell_share"])
747
748
    df_demand_share = df_demand_share[
749
        ["id", "bus_id", "scenario", "profile_share"]
750
    ]
751
    # Group and aggregate per building for multi cell buildings
752
    df_demand_share = (
753
        df_demand_share.groupby(["scenario", "bus_id", "id"])
754
        .sum()
755
        .reset_index()
756
    )
757
758
    return df_demand_share
759
760
761
def calc_building_profiles(
762
    df_demand_share=None,
763
    egon_building_id=None,
764
    bus_id=None,
765
    scenario="eGon2035",
766
):
767
    """
768
    Calculate the demand profile for each building. The profile is
769
    calculated by the demand share of the building per substation bus.
770
771
    Parameters
772
    ----------
773
    df_demand_share: pd.DataFrame
774
        Table of demand share per building. If not given, table is
775
        sourced from database.
776
    egon_building_id: int
777
        Id of the building for which the profile is calculated. If not
778
        given, the profiles are calculated for all buildings.
779
780
    Returns
781
    -------
782
    df_building_profiles: pd.DataFrame
783
        Table of demand profile per building
784
    """
785
786
    if not isinstance(df_demand_share, pd.DataFrame):
787
        with db.session_scope() as session:
788
            cells_query = session.query(EgonCtsElectricityDemandBuildingShare)
789
790
        df_demand_share = pd.read_sql(
791
            cells_query.statement, cells_query.session.bind, index_col=None
792
        )
793
794
    df_cts_profiles = calc_load_curves_cts(scenario)
795
796
    # get demand share of selected building id
797
    if isinstance(egon_building_id, int):
798
        if egon_building_id in df_demand_share["id"]:
799
            df_demand_share = df_demand_share.loc[
800
                df_demand_share["id"] == egon_building_id
801
            ]
802
        else:
803
            raise KeyError(f"Building with id {egon_building_id} not found")
804
805
    # get demand share of all buildings for selected bus id
806
    if isinstance(bus_id, int):
807
        if bus_id in df_demand_share["bus_id"]:
808
            df_demand_share = df_demand_share.loc[
809
                df_demand_share["bus_id"] == bus_id
810
            ]
811
        else:
812
            raise KeyError(f"Bus with id {bus_id} not found")
813
814
    # get demand profile for all buildings for selected demand share
815
    df_building_profiles = pd.DataFrame()
816
    for bus_id, df in df_demand_share.groupby("bus_id"):
817
        shares = df.set_index("id", drop=True)["profile_share"]
818
        profile = df_cts_profiles.loc[:, bus_id]
819
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 816 is not entered. Are you sure this can never be the case?
Loading history...
820
        df_building_profiles = pd.concat(
821
            [df_building_profiles, building_profiles], axis=1
822
        )
823
824
    return df_building_profiles
825
826
827
def cts_to_buildings():
828
    """
829
    Assigns CTS demand to buildings and calculates the respective demand
830
    profiles. The demand profile per substation are disaggregated per
831
    annual demand share of each census cell and by the number of amenities
832
    per building within the cell. If no building data is available,
833
    synthetic buildings are generated around the amenities. If no amenities
834
    but cts demand is available, buildings are randomly selected. If no
835
    building nor amenity is available, random synthetic buildings are
836
    generated. The demand share is stored in the database.
837
838
    Note:
839
    -----
840
    Cells with CTS demand, amenities and buildings do not change within
841
    the scenarios, only the demand itself. Therefore scenario eGon2035
842
    can be used universally to determine the cts buildings but not for
843
    he demand share.
844
    """
845
846
    # Buildings with amenities
847
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
848
849
    # Remove synthetic CTS buildings if existing
850
    delete_synthetic_cts_buildings()
851
852
    # Create synthetic buildings for amenites without buildings
853
    df_amenities_without_buildings = amenities_without_buildings()
854
    if not df_lost_cells.empty():
855
        df_lost_cells = place_buildings_with_amenities(
856
            df_lost_cells, amenities=1
857
        )
858
        df_lost_cells.rename(
859
            columns={
860
                "id": "zensus_population_id",
861
                "geom_point": "geom_amenity",
862
            },
863
            inplace=True,
864
        )
865
        df_lost_cells.drop(columns="building_count", inplace=True)
866
        df_amenities_without_buildings = df_amenities_without_buildings.append(
867
            df_lost_cells, ignore_index=True
868
        )
869
870
    df_amenities_without_buildings["n_amenities_inside"] = 1
871
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
872
        df_amenities_without_buildings, points="geom_amenity"
873
    )
874
875
    # TODO write to DB and remove renaming
876
    # write_synthetic_buildings_to_db(df_synthetic_buildings_with_amenities)
877
    write_table_to_postgis(
878
        df_synthetic_buildings_with_amenities.rename(
879
            columns={
880
                "zensus_population_id": "cell_id",
881
                "egon_building_id": "id",
882
            }
883
        ),
884
        OsmBuildingsSynthetic,
885
        drop=False,
886
    )
887
888
    # Cells without amenities but CTS demand and buildings
889
    df_buildings_without_amenities = buildings_without_amenities()
890
891
    # TODO Fix Adhoc Bugfix duplicated buildings
892
    mask = df_buildings_without_amenities.loc[
893
        df_buildings_without_amenities["id"].isin(
894
            df_buildings_with_amenities["id"]
895
        )
896
    ].index
897
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
898
        index=mask
899
    ).reset_index(drop=True)
900
901
    df_buildings_without_amenities = select_cts_buildings(
902
        df_buildings_without_amenities
903
    )
904
    df_buildings_without_amenities["n_amenities_inside"] = 1
905
906
    # Create synthetic amenities and buildings in cells with only CTS demand
907
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
908
        df_buildings_without_amenities
909
    )
910
    # Only 1 Amenity per cell
911
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
912
    # Only 1 Amenity per Building
913
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
914
        df_cells_with_cts_demand_only, amenities=1
915
    )
916
    # Leads to only 1 building per cell
917
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
918
        df_cells_with_cts_demand_only, points="geom_point"
919
    )
920
921
    # TODO write to DB and remove renaming
922
    # write_synthetic_buildings_to_db(df_synthetic_buildings_without_amenities)
923
    write_table_to_postgis(
924
        df_synthetic_buildings_without_amenities.rename(
925
            columns={
926
                "zensus_population_id": "cell_id",
927
                "egon_building_id": "id",
928
            }
929
        ),
930
        OsmBuildingsSynthetic,
931
        drop=False,
932
    )
933
934
    # Concat all buildings
935
    columns = [
936
        "zensus_population_id",
937
        "id",
938
        "geom_building",
939
        "n_amenities_inside",
940
        "source",
941
    ]
942
943
    df_buildings_with_amenities["source"] = "bwa"
944
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
945
    df_buildings_without_amenities["source"] = "bwoa"
946
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
947
948
    df_cts_buildings = pd.concat(
949
        [
950
            df_buildings_with_amenities[columns],
951
            df_synthetic_buildings_with_amenities[columns],
952
            df_buildings_without_amenities[columns],
953
            df_synthetic_buildings_without_amenities[columns],
954
        ],
955
        axis=0,
956
        ignore_index=True,
957
    )
958
    # TODO maybe remove after #772
959
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
960
961
    # Write table to db for debugging
962
    # TODO remove later
963
    df_cts_buildings = gpd.GeoDataFrame(
964
        df_cts_buildings, geometry="geom_building", crs=3035
965
    )
966
    df_cts_buildings = df_cts_buildings.reset_index().rename(
967
        columns={"index": "serial"}
968
    )
969
    write_table_to_postgis(
970
        df_cts_buildings,
971
        CtsBuildings,
972
        drop=True,
973
    )
974
975
    df_demand_share_2035 = calc_building_demand_profile_share(
976
        df_cts_buildings, scenario="eGon2035"
977
    )
978
    df_demand_share_100RE = calc_building_demand_profile_share(
979
        df_cts_buildings, scenario="eGon100RE"
980
    )
981
982
    df_demand_share = pd.concat(
983
        [df_demand_share_2035, df_demand_share_100RE],
984
        axis=0,
985
        ignore_index=True,
986
    )
987
988
    write_table_to_postgres(
989
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
990
    )
991
992
    return df_cts_buildings, df_demand_share
993
994
995
def get_peak_load_cts_buildings():
996
    """
997
    Get peak load of all CTS buildings for both scenarios and store in DB.
998
    """
999
1000
    # TODO Check units, maybe MwH?
1001
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
1002
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename(
1003
        "cts_peak_load_in_w_2035"
1004
    )
1005
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
1006
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename(
1007
        "cts_peak_load_in_w_100RE"
1008
    )
1009
    df_peak_load = pd.concat(
1010
        [df_peak_load_2035, df_peak_load_100RE], axis=1
1011
    ).reset_index()
1012
1013
    CtsPeakLoads.__table__.drop(bind=engine, checkfirst=True)
1014
    CtsPeakLoads.__table__.create(bind=engine, checkfirst=True)
1015
1016
    # Write peak loads into db
1017
    with db.session_scope() as session:
1018
        session.bulk_insert_mappings(
1019
            CtsPeakLoads,
1020
            df_peak_load.to_dict(orient="records"),
1021
        )
1022
1023
1024
def delete_synthetic_cts_buildings():
1025
    """
1026
    All synthetic cts buildings are deleted from the DB. This is necessary if
1027
    the task is run multiple times as the existing synthetic buildings
1028
    influence the results.
1029
    """
1030
    # import db tables
1031
    from saio.openstreetmap import osm_buildings_synthetic
1032
1033
    # cells mit amenities
1034
    with db.session_scope() as session:
1035
        session.query(osm_buildings_synthetic).filter(
1036
            osm_buildings_synthetic.building == "cts"
1037
        ).delete()
1038
1039
1040
class CtsElectricityBuildings(Dataset):
1041
    def __init__(self, dependencies):
1042
        super().__init__(
1043
            name="CtsElectricityBuildings",
1044
            version="0.0.0.",
1045
            dependencies=dependencies,
1046
            tasks=(
1047
                cts_to_buildings,
1048
                get_peak_load_cts_buildings,
1049
                # get_all_cts_building_profiles,
1050
            ),
1051
        )
1052