Passed
Pull Request — dev (#826)
by
unknown
01:43
created

data.datasets.electricity_demand_timeseries.cts_buildings   A

Complexity

Total Complexity 41

Size/Duplication

Total Lines 1046
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 41
eloc 533
dl 0
loc 1046
rs 9.1199
c 0
b 0
f 0

14 Functions

Rating   Name   Duplication   Size   Complexity  
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A amenities_without_buildings() 0 60 2
A delete_synthetic_cts_buildings() 0 14 2
A get_peak_load_cts_buildings() 0 26 2
C calc_building_profiles() 0 64 9
A select_cts_buildings() 0 23 1
B cells_with_cts_demand_only() 0 72 2
A write_synthetic_buildings_to_db() 0 31 2
B cts_to_buildings() 0 166 2
A calc_building_demand_profile_share() 0 67 1
B buildings_with_amenities() 0 147 4
A calc_census_cell_share() 0 56 2
B buildings_without_amenities() 0 99 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 8 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    OsmBuildingsSynthetic,
17
)
18
from egon.data.datasets.electricity_demand_timeseries.tools import (
19
    random_ints_until_sum,
20
    random_point_in_square,
21
    specific_int_until_sum,
22
    write_table_to_postgis,
23
    write_table_to_postgres,
24
)
25
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
26
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
27
import egon.data.config
28
29
engine = db.engine()
30
Base = declarative_base()
31
32
data_config = egon.data.config.datasets()
33
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"]
34
35
# import db tables
36
saio.register_schema("openstreetmap", engine=engine)
37
saio.register_schema("society", engine=engine)
38
saio.register_schema("demand", engine=engine)
39
saio.register_schema("boundaries", engine=engine)
40
41
42
class EgonCtsElectricityDemandBuildingShare(Base):
43
    __tablename__ = "egon_cts_electricity_demand_building_share"
44
    __table_args__ = {"schema": "demand"}
45
46
    id = Column(Integer, primary_key=True)
47
    scenario = Column(String, primary_key=True)
48
    bus_id = Column(Integer, index=True)
49
    profile_share = Column(Float)
50
51
52
class CtsPeakLoads(Base):
53
    __tablename__ = "egon_cts_peak_loads"
54
    __table_args__ = {"schema": "demand"}
55
56
    id = Column(String, primary_key=True)
57
    cts_peak_load_in_w_2035 = Column(REAL)
58
    cts_peak_load_in_w_100RE = Column(REAL)
59
60
61
class CtsBuildings(Base):
62
    __tablename__ = "egon_cts_buildings"
63
    __table_args__ = {"schema": "openstreetmap"}
64
65
    serial = Column(Integer, primary_key=True)
66
    id = Column(Integer)
67
    zensus_population_id = Column(Integer, index=True)
68
    geom_building = Column(Geometry("Polygon", 3035))
69
    n_amenities_inside = Column(Integer)
70
    source = Column(String)
71
72
73
def amenities_without_buildings():
74
    """
75
    Amenities which have no buildings assigned and are in
76
    a cell with cts demand are determined.
77
78
    Returns
79
    -------
80
    pd.DataFrame
81
        Table of amenities without buildings
82
    """
83
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
84
85
    with db.session_scope() as session:
86
        cells_query = (
87
            session.query(
88
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
89
                # TODO can be used for square around amenity
90
                #  (1 geom_amenity: 1 geom_building)
91
                #  not unique amenity_ids yet
92
                osm_amenities_not_in_buildings_filtered.geom_amenity,
93
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
94
                # EgonDemandRegioZensusElectricity.demand,
95
                # # TODO can be used to generate n random buildings
96
                # # (n amenities : 1 randombuilding)
97
                # func.count(
98
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
99
                # ).label("n_amenities_inside"),
100
                # DestatisZensusPopulationPerHa.geom,
101
            )
102
            .filter(
103
                func.st_within(
104
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
105
                    DestatisZensusPopulationPerHa.geom,
106
                )
107
            )
108
            .filter(
109
                DestatisZensusPopulationPerHa.id
110
                == EgonDemandRegioZensusElectricity.zensus_population_id
111
            )
112
            .filter(
113
                EgonDemandRegioZensusElectricity.sector == "service",
114
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
115
                #         ).group_by(
116
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
117
                #             DestatisZensusPopulationPerHa.geom,
118
            )
119
        )
120
    # # TODO can be used to generate n random buildings
121
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
122
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
123
    # )
124
    #
125
126
    # # TODO can be used for square around amenity
127
    df_amenities_without_buildings = gpd.read_postgis(
128
        cells_query.statement,
129
        cells_query.session.bind,
130
        geom_col="geom_amenity",
131
    )
132
    return df_amenities_without_buildings
133
134
135
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
136
    """
137
    Building centroids are placed randomly within census cells.
138
    The Number of buildings is derived from n_amenity_inside, the selected
139
    method and number of amenities per building.
140
141
    Returns
142
    -------
143
    df: gpd.GeoDataFrame
144
        Table of buildings centroids
145
    """
146
    if isinstance(max_amenities, int):
147
        # amount of amenities is randomly generated within bounds
148
        # (max_amenities, amenities per cell)
149
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
150
            random_ints_until_sum, args=[max_amenities]
151
        )
152
    if isinstance(amenities, int):
153
        # Specific amount of amenities per building
154
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
155
            specific_int_until_sum, args=[amenities]
156
        )
157
158
    # Unnest each building
159
    df = df.explode(column="n_amenities_inside")
160
161
    # building count per cell
162
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
163
164
    # generate random synthetic buildings
165
    edge_length = 5
166
    # create random points within census cells
167
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
168
169
    df.reset_index(drop=True, inplace=True)
170
    # Store center of polygon
171
    df["geom_point"] = points
172
    # Drop geometry of census cell
173
    df = df.drop(columns=["geom"])
174
175
    return df
176
177
178
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
179
    """
180
    Synthetic buildings are generated around points.
181
182
    Parameters
183
    ----------
184
    df: pd.DataFrame
185
        Table of census cells
186
    points: gpd.GeoSeries or str
187
        List of points to place buildings around or column name of df
188
    crs: str
189
        CRS of result table
190
191
    Returns
192
    -------
193
    df: gpd.GeoDataFrame
194
        Synthetic buildings
195
    """
196
197
    if isinstance(points, str) and points in df.columns:
198
        points = df[points]
199
    elif isinstance(points, gpd.GeoSeries):
200
        pass
201
    else:
202
        raise ValueError("Points are of the wrong type")
203
204
    # Create building using a square around point
205
    edge_length = 5
206
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
207
208
    if "geom_point" not in df.columns:
209
        df["geom_point"] = df["geom_building"].centroid
210
211
    # TODO Check CRS
212
    df = gpd.GeoDataFrame(
213
        df,
214
        crs=crs,
215
        geometry="geom_building",
216
    )
217
218
    # TODO remove after implementation of egon_building_id
219
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
220
221
    # get max number of building ids from synthetic residential table
222
    with db.session_scope() as session:
223
        max_synth_residential_id = session.execute(
224
            func.max(OsmBuildingsSynthetic.id)
225
        ).scalar()
226
    max_synth_residential_id = int(max_synth_residential_id)
227
228
    # create sequential ids
229
    df["egon_building_id"] = range(
230
        max_synth_residential_id + 1,
231
        max_synth_residential_id + df.shape[0] + 1,
232
    )
233
234
    df["area"] = df["geom_building"].area
235
    # set building type of synthetic building
236
    df["building"] = "cts"
237
    # TODO remove in #772
238
    df = df.rename(
239
        columns={
240
            # "zensus_population_id": "cell_id",
241
            "egon_building_id": "id",
242
        }
243
    )
244
    return df
245
246
247
def buildings_with_amenities():
248
    """
249
    Amenities which are assigned to buildings are determined
250
    and grouped per building and zensus cell. Buildings
251
    covering multiple cells therefore exists multiple times
252
    but in different zensus cells. This is necessary to cover
253
    all cells with a cts demand. If buildings exist in multiple
254
    substations, their amenities are summed and assigned and kept in
255
    one substation only. If as a result, a census cell is uncovered,
256
    a synthetic amenity is placed. The buildings are aggregated
257
    afterwards during the calculation of the profile_share.
258
259
    Returns
260
    -------
261
    df_buildings_with_amenities: gpd.GeoDataFrame
262
        Contains all buildings with amenities per zensus cell.
263
    df_lost_cells: gpd.GeoDataFrame
264
        Contains synthetic amenities in lost cells. Might be empty
265
    """
266
267
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
268
269
    with db.session_scope() as session:
270
        cells_query = (
271
            session.query(
272
                osm_amenities_in_buildings_filtered,
273
                MapZensusGridDistricts.bus_id,
274
            )
275
            .filter(
276
                MapZensusGridDistricts.zensus_population_id
277
                == osm_amenities_in_buildings_filtered.zensus_population_id
278
            )
279
            .filter(
280
                EgonDemandRegioZensusElectricity.zensus_population_id
281
                == osm_amenities_in_buildings_filtered.zensus_population_id
282
            )
283
            .filter(
284
                EgonDemandRegioZensusElectricity.sector == "service",
285
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
286
            )
287
        )
288
    df_amenities_in_buildings = pd.read_sql(
289
        cells_query.statement, cells_query.session.bind, index_col=None
290
    )
291
292
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
293
        "geom_building"
294
    ].apply(to_shape)
295
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
296
        "geom_amenity"
297
    ].apply(to_shape)
298
299
    df_amenities_in_buildings["n_amenities_inside"] = 1
300
301
    # add identifier column for buildings in multiple substations
302
    df_amenities_in_buildings[
303
        "duplicate_identifier"
304
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
305
        "n_amenities_inside"
306
    ].transform(
307
        "cumsum"
308
    )
309
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
310
        ["id", "duplicate_identifier"]
311
    )
312
    # sum amenities of buildings with multiple substations
313
    df_amenities_in_buildings[
314
        "n_amenities_inside"
315
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
316
        "n_amenities_inside"
317
    ].transform(
318
        "sum"
319
    )
320
    # identify lost zensus cells
321
    df_lost_cells = df_amenities_in_buildings.loc[
322
        df_amenities_in_buildings.duplicated(
323
            subset=["id", "duplicate_identifier"], keep="first"
324
        )
325
    ]["zensus_population_id"]
326
327
    # check if lost zensus cells are already covered
328
    if (
329
        df_amenities_in_buildings["zensus_population_id"]
330
        .isin(df_lost_cells)
331
        .any()
332
    ):
333
        # query geom data for cell if not
334
        with db.session_scope() as session:
335
            cells_query = session.query(
336
                DestatisZensusPopulationPerHa.id,
337
                DestatisZensusPopulationPerHa.geom,
338
            ).filter(DestatisZensusPopulationPerHa.id.in_(df_lost_cells))
339
340
        df_lost_cells = gpd.read_postgis(
341
            cells_query.statement,
342
            cells_query.session.bind,
343
            geom_col="geom",
344
        )
345
        # TODO maybe adapt method
346
        # place random amenity in cell
347
        df_lost_cells = place_buildings_with_amenities(
348
            df_lost_cells, amenities=1
349
        )
350
        df_lost_cells.rename(
351
            columns={
352
                "id": "zensus_population_id",
353
                "geom_point": "geom_amenity",
354
            },
355
            inplace=True,
356
        )
357
        df_lost_cells.drop(columns="building_count", inplace=True)
358
359
    # drop duplicated buildings
360
    df_amenities_in_buildings.drop_duplicates(
361
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
362
    )
363
    # drop helper columns
364
    df_amenities_in_buildings.drop(
365
        columns=["duplicate_identifier"], inplace=True
366
    )
367
368
    # sum amenities per building and cell
369
    df_amenities_in_buildings[
370
        "n_amenities_inside"
371
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
372
        "n_amenities_inside"
373
    ].transform(
374
        "sum"
375
    )
376
    # drop duplicated buildings
377
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
378
        ["id", "zensus_population_id"]
379
    )
380
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
381
382
    df_buildings_with_amenities = df_buildings_with_amenities[
383
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
384
    ]
385
    df_buildings_with_amenities.rename(
386
        columns={
387
            # "zensus_population_id": "cell_id",
388
            "egon_building_id": "id"
389
        },
390
        inplace=True,
391
    )
392
393
    return df_buildings_with_amenities, df_lost_cells
394
395
396
# TODO Remove as depricated
397
def write_synthetic_buildings_to_db(df_synthetic_buildings):
398
    """"""
399
    if "geom_point" not in df_synthetic_buildings.columns:
400
        df_synthetic_buildings["geom_point"] = df_synthetic_buildings[
401
            "geom_building"
402
        ].centroid
403
404
    df_synthetic_buildings = df_synthetic_buildings.rename(
405
        columns={
406
            "zensus_population_id": "cell_id",
407
            "egon_building_id": "id",
408
        }
409
    )
410
    # Only take existing columns
411
    columns = [
412
        column.key for column in OsmBuildingsSynthetic.__table__.columns
413
    ]
414
    df_synthetic_buildings = df_synthetic_buildings.loc[:, columns]
415
416
    dtypes = {
417
        i: OsmBuildingsSynthetic.__table__.columns[i].type
418
        for i in OsmBuildingsSynthetic.__table__.columns.keys()
419
    }
420
421
    # Write new buildings incl coord into db
422
    df_synthetic_buildings.to_postgis(
423
        name=OsmBuildingsSynthetic.__tablename__,
424
        con=engine,
425
        if_exists="append",
426
        schema=OsmBuildingsSynthetic.__table_args__["schema"],
427
        dtype=dtypes,
428
    )
429
430
431
def buildings_without_amenities():
432
    """
433
    Buildings (filtered and synthetic) in cells with
434
    cts demand but no amenities are determined.
435
436
    Returns
437
    -------
438
    df_buildings_without_amenities: gpd.GeoDataFrame
439
        Table of buildings without amenities in zensus cells
440
        with cts demand.
441
    """
442
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
443
    from saio.openstreetmap import (
444
        osm_amenities_shops_filtered,
445
        osm_buildings_filtered,
446
        osm_buildings_synthetic,
447
    )
448
449
    # buildings_filtered in cts-demand-cells without amenities
450
    with db.session_scope() as session:
451
452
        # Synthetic Buildings
453
        q_synth_buildings = session.query(
454
            osm_buildings_synthetic.cell_id.cast(Integer).label(
455
                "zensus_population_id"
456
            ),
457
            osm_buildings_synthetic.id.cast(Integer).label("id"),
458
            osm_buildings_synthetic.area.label("area"),
459
            osm_buildings_synthetic.geom_building.label("geom_building"),
460
            osm_buildings_synthetic.geom_point.label("geom_point"),
461
        )
462
463
        # Buildings filtered
464
        q_buildings_filtered = session.query(
465
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
466
            osm_buildings_filtered.id,
467
            osm_buildings_filtered.area,
468
            osm_buildings_filtered.geom_building,
469
            osm_buildings_filtered.geom_point,
470
        ).filter(
471
            osm_buildings_filtered.id
472
            == egon_map_zensus_buildings_filtered_all.id
473
        )
474
475
        # Amenities + zensus_population_id
476
        q_amenities = (
477
            session.query(
478
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
479
            )
480
            .filter(
481
                func.st_within(
482
                    osm_amenities_shops_filtered.geom_amenity,
483
                    DestatisZensusPopulationPerHa.geom,
484
                )
485
            )
486
            .distinct(DestatisZensusPopulationPerHa.id)
487
        )
488
489
        # Cells with CTS demand but without amenities
490
        q_cts_without_amenities = (
491
            session.query(
492
                EgonDemandRegioZensusElectricity.zensus_population_id,
493
            )
494
            .filter(
495
                EgonDemandRegioZensusElectricity.sector == "service",
496
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
497
            )
498
            .filter(
499
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
500
                    q_amenities
501
                )
502
            )
503
            .distinct()
504
        )
505
506
        # Buildings filtered + synthetic buildings residential in
507
        # cells with CTS demand but without amenities
508
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
509
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
510
                q_cts_without_amenities
511
            )
512
        )
513
514
    # df_buildings_without_amenities = pd.read_sql(
515
    #     cells_query.statement, cells_query.session.bind, index_col=None)
516
    df_buildings_without_amenities = gpd.read_postgis(
517
        cells_query.statement,
518
        cells_query.session.bind,
519
        geom_col="geom_building",
520
    )
521
522
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
523
        columns={
524
            # "zensus_population_id": "cell_id",
525
            "egon_building_id": "id",
526
        }
527
    )
528
529
    return df_buildings_without_amenities
530
531
532
def select_cts_buildings(df_buildings_wo_amenities):
533
    """
534
    Buildings (filtered and synthetic) in cells with
535
    cts demand are selected. Only the first building
536
    is taken for each cell and 1 amenities is assigned.
537
538
    Returns
539
    -------
540
    df_buildings_with_cts_demand: gpd.GeoDataFrame
541
        Table of buildings
542
    """
543
    # TODO Adapt method
544
    # Select one building each cell
545
    # take the first
546
    df_buildings_with_cts_demand = df_buildings_wo_amenities.drop_duplicates(
547
        # subset="cell_id", keep="first"
548
        subset="zensus_population_id",
549
        keep="first",
550
    ).reset_index(drop=True)
551
    df_buildings_with_cts_demand["n_amenities_inside"] = 1
552
    df_buildings_with_cts_demand["building"] = "cts"
553
554
    return df_buildings_with_cts_demand
555
556
557
def cells_with_cts_demand_only(df_buildings_without_amenities):
558
    """
559
    Cells with cts demand but no amenities or buildilngs
560
    are determined.
561
562
    Returns
563
    -------
564
    df_cells_only_cts_demand: gpd.GeoDataFrame
565
        Table of cells with cts demand but no amenities or buildings
566
    """
567
    from saio.openstreetmap import osm_amenities_shops_filtered
568
569
    # cells mit amenities
570
    with db.session_scope() as session:
571
        sub_query = (
572
            session.query(
573
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
574
            )
575
            .filter(
576
                func.st_within(
577
                    osm_amenities_shops_filtered.geom_amenity,
578
                    DestatisZensusPopulationPerHa.geom,
579
                )
580
            )
581
            .distinct(DestatisZensusPopulationPerHa.id)
582
        )
583
584
        cells_query = (
585
            session.query(
586
                EgonDemandRegioZensusElectricity.zensus_population_id,
587
                EgonDemandRegioZensusElectricity.scenario,
588
                EgonDemandRegioZensusElectricity.sector,
589
                EgonDemandRegioZensusElectricity.demand,
590
                DestatisZensusPopulationPerHa.geom,
591
            )
592
            .filter(
593
                EgonDemandRegioZensusElectricity.sector == "service",
594
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
595
            )
596
            .filter(
597
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
598
                    sub_query
599
                )
600
            )
601
            .filter(
602
                EgonDemandRegioZensusElectricity.zensus_population_id
603
                == DestatisZensusPopulationPerHa.id
604
            )
605
        )
606
607
    df_cts_cell_without_amenities = gpd.read_postgis(
608
        cells_query.statement,
609
        cells_query.session.bind,
610
        geom_col="geom",
611
        index_col=None,
612
    )
613
614
    # TODO maybe remove
615
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
616
        columns={"cell_id": "zensus_population_id"}
617
    )
618
619
    # Census cells with only cts demand
620
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
621
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
622
            df_buildings_without_amenities["zensus_population_id"].unique()
623
        )
624
    ]
625
626
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
627
628
    return df_cells_only_cts_demand
629
630
631
def calc_census_cell_share(scenario="eGon2035"):
632
    """
633
    The profile share for each census cell is calculated by it's
634
    share of annual demand per substation bus. The annual demand
635
    per cell is defined by DemandRegio. The share is for both
636
    scenarios identical as the annual demand is linearly scaled.
637
638
    Parameters
639
    ----------
640
    scenario: str
641
        Scenario for which the share is calculated.
642
643
    Returns
644
    -------
645
    df_census_share: pd.DataFrame
646
    """
647
648
    with db.session_scope() as session:
649
        cells_query = (
650
            session.query(
651
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
652
            )
653
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
654
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
655
            .filter(
656
                EgonDemandRegioZensusElectricity.zensus_population_id
657
                == MapZensusGridDistricts.zensus_population_id
658
            )
659
        )
660
661
    df_demand_regio_electricity_demand = pd.read_sql(
662
        cells_query.statement,
663
        cells_query.session.bind,
664
        index_col="zensus_population_id",
665
    )
666
667
    # get demand share of cell per bus
668
    df_census_share = df_demand_regio_electricity_demand[
669
        "demand"
670
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
671
        "demand"
672
    ].transform(
673
        "sum"
674
    )
675
    df_census_share = df_census_share.rename("cell_share")
676
677
    df_census_share = pd.concat(
678
        [
679
            df_census_share,
680
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
681
        ],
682
        axis=1,
683
    )
684
685
    df_census_share.reset_index(inplace=True)
686
    return df_census_share
687
688
689
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
690
    """
691
    Share of cts electricity demand profile per bus for every selected building
692
    is calculated. Building-amenity share is multiplied with census cell share
693
    to get the substation bus profile share for each building. The share is
694
    grouped and aggregated per building as some cover multiple cells.
695
696
    Parameters
697
    ----------
698
    df_cts_buildings: gpd.GeoDataFrame
699
        Table of all buildings with cts demand assigned
700
    scenario: str
701
        Scenario for which the share is calculated.
702
703
    Returns
704
    -------
705
    df_building_share: pd.DataFrame
706
        Table of bus profile share per building
707
708
    """
709
710
    def calc_building_amenity_share(df_cts_buildings):
711
        """
712
        Calculate the building share by the number amenities per building
713
        within a census cell.
714
        """
715
        df_building_amenity_share = df_cts_buildings[
716
            "n_amenities_inside"
717
        ] / df_cts_buildings.groupby("zensus_population_id")[
718
            "n_amenities_inside"
719
        ].transform(
720
            "sum"
721
        )
722
        df_building_amenity_share = pd.concat(
723
            [
724
                df_building_amenity_share.rename("building_amenity_share"),
725
                df_cts_buildings[["zensus_population_id", "id"]],
726
            ],
727
            axis=1,
728
        )
729
        return df_building_amenity_share
730
731
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
732
733
    df_census_cell_share = calc_census_cell_share(scenario)
734
735
    df_demand_share = pd.merge(
736
        left=df_building_amenity_share,
737
        right=df_census_cell_share,
738
        left_on="zensus_population_id",
739
        right_on="zensus_population_id",
740
    )
741
    df_demand_share["profile_share"] = df_demand_share[
742
        "building_amenity_share"
743
    ].multiply(df_demand_share["cell_share"])
744
745
    df_demand_share = df_demand_share[
746
        ["id", "bus_id", "scenario", "profile_share"]
747
    ]
748
    # Group and aggregate per building for multi cell buildings
749
    df_demand_share = (
750
        df_demand_share.groupby(["scenario", "bus_id", "id"])
751
        .sum()
752
        .reset_index()
753
    )
754
755
    return df_demand_share
756
757
758
def calc_building_profiles(
759
    df_demand_share=None,
760
    egon_building_id=None,
761
    bus_id=None,
762
    scenario="eGon2035",
763
):
764
    """
765
    Calculate the demand profile for each building. The profile is
766
    calculated by the demand share of the building per substation bus.
767
768
    Parameters
769
    ----------
770
    df_demand_share: pd.DataFrame
771
        Table of demand share per building. If not given, table is
772
        sourced from database.
773
    egon_building_id: int
774
        Id of the building for which the profile is calculated. If not
775
        given, the profiles are calculated for all buildings.
776
777
    Returns
778
    -------
779
    df_building_profiles: pd.DataFrame
780
        Table of demand profile per building
781
    """
782
783
    if not isinstance(df_demand_share, pd.DataFrame):
784
        with db.session_scope() as session:
785
            cells_query = session.query(EgonCtsElectricityDemandBuildingShare)
786
787
        df_demand_share = pd.read_sql(
788
            cells_query.statement, cells_query.session.bind, index_col=None
789
        )
790
791
    df_cts_profiles = calc_load_curves_cts(scenario)
792
793
    # get demand share of selected building id
794
    if isinstance(egon_building_id, int):
795
        if egon_building_id in df_demand_share["id"]:
796
            df_demand_share = df_demand_share.loc[
797
                df_demand_share["id"] == egon_building_id
798
            ]
799
        else:
800
            raise KeyError(f"Building with id {egon_building_id} not found")
801
802
    # get demand share of all buildings for selected bus id
803
    if isinstance(bus_id, int):
804
        if bus_id in df_demand_share["bus_id"]:
805
            df_demand_share = df_demand_share.loc[
806
                df_demand_share["bus_id"] == bus_id
807
            ]
808
        else:
809
            raise KeyError(f"Bus with id {bus_id} not found")
810
811
    # get demand profile for all buildings for selected demand share
812
    df_building_profiles = pd.DataFrame()
813
    for bus_id, df in df_demand_share.groupby("bus_id"):
814
        shares = df.set_index("id", drop=True)["profile_share"]
815
        profile = df_cts_profiles.loc[:, bus_id]
816
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 813 is not entered. Are you sure this can never be the case?
Loading history...
817
        df_building_profiles = pd.concat(
818
            [df_building_profiles, building_profiles], axis=1
819
        )
820
821
    return df_building_profiles
822
823
824
def cts_to_buildings():
825
    """
826
    Assigns CTS demand to buildings and calculates the respective demand
827
    profiles. The demand profile per substation are disaggregated per
828
    annual demand share of each census cell and by the number of amenities
829
    per building within the cell. If no building data is available,
830
    synthetic buildings are generated around the amenities. If no amenities
831
    but cts demand is available, buildings are randomly selected. If no
832
    building nor amenity is available, random synthetic buildings are
833
    generated. The demand share is stored in the database.
834
835
    Note:
836
    -----
837
    Cells with CTS demand, amenities and buildings do not change within
838
    the scenarios, only the demand itself. Therefore scenario eGon2035
839
    can be used universally to determine the cts buildings but not for
840
    he demand share.
841
    """
842
843
    # Buildings with amenities
844
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
845
846
    # Remove synthetic CTS buildings if existing
847
    delete_synthetic_cts_buildings()
848
849
    # Create synthetic buildings for amenites without buildings
850
    df_amenities_without_buildings = amenities_without_buildings()
851
    if not df_lost_cells.empty():
852
        df_lost_cells = place_buildings_with_amenities(
853
            df_lost_cells, amenities=1
854
        )
855
        df_lost_cells.rename(
856
            columns={
857
                "id": "zensus_population_id",
858
                "geom_point": "geom_amenity",
859
            },
860
            inplace=True,
861
        )
862
        df_lost_cells.drop(columns="building_count", inplace=True)
863
        df_amenities_without_buildings = df_amenities_without_buildings.append(
864
            df_lost_cells, ignore_index=True
865
        )
866
867
    df_amenities_without_buildings["n_amenities_inside"] = 1
868
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
869
        df_amenities_without_buildings, points="geom_amenity"
870
    )
871
872
    # TODO write to DB and remove renaming
873
    # write_synthetic_buildings_to_db(df_synthetic_buildings_with_amenities)
874
    write_table_to_postgis(
875
        df_synthetic_buildings_with_amenities.rename(
876
            columns={
877
                "zensus_population_id": "cell_id",
878
                "egon_building_id": "id",
879
            }
880
        ),
881
        OsmBuildingsSynthetic,
882
        drop=False,
883
    )
884
885
    # Cells without amenities but CTS demand and buildings
886
    df_buildings_without_amenities = buildings_without_amenities()
887
888
    # TODO Fix Adhoc Bugfix duplicated buildings
889
    mask = df_buildings_without_amenities.loc[
890
        df_buildings_without_amenities["id"].isin(
891
            df_buildings_with_amenities["id"]
892
        )
893
    ].index
894
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
895
        index=mask
896
    ).reset_index(drop=True)
897
898
    df_buildings_without_amenities = select_cts_buildings(
899
        df_buildings_without_amenities
900
    )
901
    df_buildings_without_amenities["n_amenities_inside"] = 1
902
903
    # Create synthetic amenities and buildings in cells with only CTS demand
904
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
905
        df_buildings_without_amenities
906
    )
907
    # Only 1 Amenity per cell
908
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
909
    # Only 1 Amenity per Building
910
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
911
        df_cells_with_cts_demand_only, amenities=1
912
    )
913
    # Leads to only 1 building per cell
914
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
915
        df_cells_with_cts_demand_only, points="geom_point"
916
    )
917
918
    # TODO write to DB and remove renaming
919
    # write_synthetic_buildings_to_db(df_synthetic_buildings_without_amenities)
920
    write_table_to_postgis(
921
        df_synthetic_buildings_without_amenities.rename(
922
            columns={
923
                "zensus_population_id": "cell_id",
924
                "egon_building_id": "id",
925
            }
926
        ),
927
        OsmBuildingsSynthetic,
928
        drop=False,
929
    )
930
931
    # Concat all buildings
932
    columns = [
933
        "zensus_population_id",
934
        "id",
935
        "geom_building",
936
        "n_amenities_inside",
937
        "source",
938
    ]
939
940
    df_buildings_with_amenities["source"] = "bwa"
941
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
942
    df_buildings_without_amenities["source"] = "bwoa"
943
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
944
945
    df_cts_buildings = pd.concat(
946
        [
947
            df_buildings_with_amenities[columns],
948
            df_synthetic_buildings_with_amenities[columns],
949
            df_buildings_without_amenities[columns],
950
            df_synthetic_buildings_without_amenities[columns],
951
        ],
952
        axis=0,
953
        ignore_index=True,
954
    )
955
    # TODO maybe remove after #772
956
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
957
958
    # Write table to db for debugging
959
    # TODO remove later
960
    df_cts_buildings = gpd.GeoDataFrame(
961
        df_cts_buildings, geometry="geom_building", crs=3035
962
    )
963
    df_cts_buildings = df_cts_buildings.reset_index().rename(
964
        columns={"index": "serial"}
965
    )
966
    write_table_to_postgis(
967
        df_cts_buildings,
968
        CtsBuildings,
969
        drop=True,
970
    )
971
972
    df_demand_share_2035 = calc_building_demand_profile_share(
973
        df_cts_buildings, scenario="eGon2035"
974
    )
975
    df_demand_share_100RE = calc_building_demand_profile_share(
976
        df_cts_buildings, scenario="eGon100RE"
977
    )
978
979
    df_demand_share = pd.concat(
980
        [df_demand_share_2035, df_demand_share_100RE],
981
        axis=0,
982
        ignore_index=True,
983
    )
984
985
    write_table_to_postgres(
986
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
987
    )
988
989
    return df_cts_buildings, df_demand_share
990
991
992
def get_peak_load_cts_buildings():
993
    """
994
    Get peak load of all CTS buildings for both scenarios and store in DB.
995
    """
996
997
    # TODO Check units, maybe MwH?
998
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
999
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename(
1000
        "cts_peak_load_in_w_2035"
1001
    )
1002
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
1003
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename(
1004
        "cts_peak_load_in_w_100RE"
1005
    )
1006
    df_peak_load = pd.concat(
1007
        [df_peak_load_2035, df_peak_load_100RE], axis=1
1008
    ).reset_index()
1009
1010
    CtsPeakLoads.__table__.drop(bind=engine, checkfirst=True)
1011
    CtsPeakLoads.__table__.create(bind=engine, checkfirst=True)
1012
1013
    # Write peak loads into db
1014
    with db.session_scope() as session:
1015
        session.bulk_insert_mappings(
1016
            CtsPeakLoads,
1017
            df_peak_load.to_dict(orient="records"),
1018
        )
1019
1020
1021
def delete_synthetic_cts_buildings():
1022
    """
1023
    All synthetic cts buildings are deleted from the DB. This is necessary if
1024
    the task is run multiple times as the existing synthetic buildings
1025
    influence the results.
1026
    """
1027
    # import db tables
1028
    from saio.openstreetmap import osm_buildings_synthetic
1029
1030
    # cells mit amenities
1031
    with db.session_scope() as session:
1032
        session.query(osm_buildings_synthetic).filter(
1033
            osm_buildings_synthetic.building == "cts"
1034
        ).delete()
1035
1036
1037
class CtsElectricityBuildings(Dataset):
1038
    def __init__(self, dependencies):
1039
        super().__init__(
1040
            name="CtsElectricityBuildings",
1041
            version="0.0.0.",
1042
            dependencies=dependencies,
1043
            tasks=(
1044
                cts_to_buildings,
1045
                get_peak_load_cts_buildings,
1046
                # get_all_cts_building_profiles,
1047
            ),
1048
        )
1049