Passed
Pull Request — dev (#826)
by
unknown
01:48
created

data.datasets.electricity_demand_timeseries.cts_buildings   A

Complexity

Total Complexity 42

Size/Duplication

Total Lines 1087
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 42
eloc 552
dl 0
loc 1087
rs 9.0399
c 0
b 0
f 0

13 Functions

Rating   Name   Duplication   Size   Complexity  
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A amenities_without_buildings() 0 60 2
A delete_synthetic_cts_buildings() 0 14 2
A get_peak_load_cts_buildings() 0 35 3
C calc_building_profiles() 0 75 9
A select_cts_buildings() 0 25 1
B cells_with_cts_demand_only() 0 72 2
B cts_to_buildings() 0 188 2
A calc_building_demand_profile_share() 0 71 2
C buildings_with_amenities() 0 183 5
A calc_census_cell_share() 0 56 2
B buildings_without_amenities() 0 99 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 8 1

How to fix   Complexity   

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import calc_load_curves_cts
15
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
16
    BuildingPeakLoads,
17
    OsmBuildingsSynthetic,
18
)
19
from egon.data.datasets.electricity_demand_timeseries.tools import (
20
    random_ints_until_sum,
21
    random_point_in_square,
22
    specific_int_until_sum,
23
    write_table_to_postgis,
24
    write_table_to_postgres,
25
)
26
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
27
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
28
import egon.data.config
29
30
engine = db.engine()
31
Base = declarative_base()
32
33
# import db tables
34
saio.register_schema("openstreetmap", engine=engine)
35
saio.register_schema("society", engine=engine)
36
saio.register_schema("demand", engine=engine)
37
saio.register_schema("boundaries", engine=engine)
38
39
40
class EgonCtsElectricityDemandBuildingShare(Base):
41
    __tablename__ = "egon_cts_electricity_demand_building_share"
42
    __table_args__ = {"schema": "demand"}
43
44
    serial = Column(Integer, primary_key=True)
45
    id = Column(Integer, index=True)
46
    scenario = Column(String, index=True)
47
    # id = Column(Integer, primary_key=True)
48
    # scenario = Column(String, primary_key=True)
49
    bus_id = Column(Integer, index=True)
50
    profile_share = Column(Float)
51
52
53
class CtsBuildings(Base):
54
    __tablename__ = "egon_cts_buildings"
55
    __table_args__ = {"schema": "openstreetmap"}
56
57
    serial = Column(Integer, primary_key=True)
58
    id = Column(Integer, index=True)
59
    zensus_population_id = Column(Integer, index=True)
60
    geom_building = Column(Geometry("Polygon", 3035))
61
    n_amenities_inside = Column(Integer)
62
    source = Column(String)
63
64
65
def amenities_without_buildings():
66
    """
67
    Amenities which have no buildings assigned and are in
68
    a cell with cts demand are determined.
69
70
    Returns
71
    -------
72
    pd.DataFrame
73
        Table of amenities without buildings
74
    """
75
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
76
77
    with db.session_scope() as session:
78
        cells_query = (
79
            session.query(
80
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
81
                # TODO can be used for square around amenity
82
                #  (1 geom_amenity: 1 geom_building)
83
                #  not unique amenity_ids yet
84
                osm_amenities_not_in_buildings_filtered.geom_amenity,
85
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
86
                # EgonDemandRegioZensusElectricity.demand,
87
                # # TODO can be used to generate n random buildings
88
                # # (n amenities : 1 randombuilding)
89
                # func.count(
90
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
91
                # ).label("n_amenities_inside"),
92
                # DestatisZensusPopulationPerHa.geom,
93
            )
94
            .filter(
95
                func.st_within(
96
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
97
                    DestatisZensusPopulationPerHa.geom,
98
                )
99
            )
100
            .filter(
101
                DestatisZensusPopulationPerHa.id
102
                == EgonDemandRegioZensusElectricity.zensus_population_id
103
            )
104
            .filter(
105
                EgonDemandRegioZensusElectricity.sector == "service",
106
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
107
                #         ).group_by(
108
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
109
                #             DestatisZensusPopulationPerHa.geom,
110
            )
111
        )
112
    # # TODO can be used to generate n random buildings
113
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
114
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
115
    # )
116
    #
117
118
    # # TODO can be used for square around amenity
119
    df_amenities_without_buildings = gpd.read_postgis(
120
        cells_query.statement,
121
        cells_query.session.bind,
122
        geom_col="geom_amenity",
123
    )
124
    return df_amenities_without_buildings
125
126
127
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
128
    """
129
    Building centroids are placed randomly within census cells.
130
    The Number of buildings is derived from n_amenity_inside, the selected
131
    method and number of amenities per building.
132
133
    Returns
134
    -------
135
    df: gpd.GeoDataFrame
136
        Table of buildings centroids
137
    """
138
    if isinstance(max_amenities, int):
139
        # amount of amenities is randomly generated within bounds
140
        # (max_amenities, amenities per cell)
141
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
142
            random_ints_until_sum, args=[max_amenities]
143
        )
144
    if isinstance(amenities, int):
145
        # Specific amount of amenities per building
146
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
147
            specific_int_until_sum, args=[amenities]
148
        )
149
150
    # Unnest each building
151
    df = df.explode(column="n_amenities_inside")
152
153
    # building count per cell
154
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
155
156
    # generate random synthetic buildings
157
    edge_length = 5
158
    # create random points within census cells
159
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
160
161
    df.reset_index(drop=True, inplace=True)
162
    # Store center of polygon
163
    df["geom_point"] = points
164
    # Drop geometry of census cell
165
    df = df.drop(columns=["geom"])
166
167
    return df
168
169
170
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
171
    """
172
    Synthetic buildings are generated around points.
173
174
    Parameters
175
    ----------
176
    df: pd.DataFrame
177
        Table of census cells
178
    points: gpd.GeoSeries or str
179
        List of points to place buildings around or column name of df
180
    crs: str
181
        CRS of result table
182
183
    Returns
184
    -------
185
    df: gpd.GeoDataFrame
186
        Synthetic buildings
187
    """
188
189
    if isinstance(points, str) and points in df.columns:
190
        points = df[points]
191
    elif isinstance(points, gpd.GeoSeries):
192
        pass
193
    else:
194
        raise ValueError("Points are of the wrong type")
195
196
    # Create building using a square around point
197
    edge_length = 5
198
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
199
200
    if "geom_point" not in df.columns:
201
        df["geom_point"] = df["geom_building"].centroid
202
203
    # TODO Check CRS
204
    df = gpd.GeoDataFrame(
205
        df,
206
        crs=crs,
207
        geometry="geom_building",
208
    )
209
210
    # TODO remove after implementation of egon_building_id
211
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
212
213
    # get max number of building ids from synthetic residential table
214
    with db.session_scope() as session:
215
        max_synth_residential_id = session.execute(
216
            func.max(OsmBuildingsSynthetic.id)
217
        ).scalar()
218
    max_synth_residential_id = int(max_synth_residential_id)
219
220
    # create sequential ids
221
    df["egon_building_id"] = range(
222
        max_synth_residential_id + 1,
223
        max_synth_residential_id + df.shape[0] + 1,
224
    )
225
226
    df["area"] = df["geom_building"].area
227
    # set building type of synthetic building
228
    df["building"] = "cts"
229
    # TODO remove in #772
230
    df = df.rename(
231
        columns={
232
            # "zensus_population_id": "cell_id",
233
            "egon_building_id": "id",
234
        }
235
    )
236
    return df
237
238
239
def buildings_with_amenities():
240
    """
241
    Amenities which are assigned to buildings are determined
242
    and grouped per building and zensus cell. Buildings
243
    covering multiple cells therefore exists multiple times
244
    but in different zensus cells. This is necessary to cover
245
    all cells with a cts demand. If buildings exist in multiple
246
    substations, their amenities are summed and assigned and kept in
247
    one substation only. If as a result, a census cell is uncovered,
248
    a synthetic amenity is placed. The buildings are aggregated
249
    afterwards during the calculation of the profile_share.
250
251
    Returns
252
    -------
253
    df_buildings_with_amenities: gpd.GeoDataFrame
254
        Contains all buildings with amenities per zensus cell.
255
    df_lost_cells: gpd.GeoDataFrame
256
        Contains synthetic amenities in lost cells. Might be empty
257
    """
258
259
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
260
261
    with db.session_scope() as session:
262
        cells_query = (
263
            session.query(
264
                osm_amenities_in_buildings_filtered,
265
                MapZensusGridDistricts.bus_id,
266
            )
267
            .filter(
268
                MapZensusGridDistricts.zensus_population_id
269
                == osm_amenities_in_buildings_filtered.zensus_population_id
270
            )
271
            .filter(
272
                EgonDemandRegioZensusElectricity.zensus_population_id
273
                == osm_amenities_in_buildings_filtered.zensus_population_id
274
            )
275
            .filter(
276
                EgonDemandRegioZensusElectricity.sector == "service",
277
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
278
            )
279
        )
280
    df_amenities_in_buildings = pd.read_sql(
281
        cells_query.statement, cells_query.session.bind, index_col=None
282
    )
283
284
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
285
        "geom_building"
286
    ].apply(to_shape)
287
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
288
        "geom_amenity"
289
    ].apply(to_shape)
290
291
    df_amenities_in_buildings["n_amenities_inside"] = 1
292
293
    # add identifier column for buildings in multiple substations
294
    df_amenities_in_buildings[
295
        "duplicate_identifier"
296
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
297
        "n_amenities_inside"
298
    ].transform(
299
        "cumsum"
300
    )
301
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
302
        ["id", "duplicate_identifier"]
303
    )
304
    # sum amenities of buildings with multiple substations
305
    df_amenities_in_buildings[
306
        "n_amenities_inside"
307
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
308
        "n_amenities_inside"
309
    ].transform(
310
        "sum"
311
    )
312
313
    # create column to always go for bus_id with max amenities
314
    df_amenities_in_buildings[
315
        "max_amenities"
316
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
317
        "n_amenities_inside"
318
    ].transform(
319
        "sum"
320
    )
321
    # sort to go for
322
    df_amenities_in_buildings.sort_values(
323
        ["id", "max_amenities"], ascending=False, inplace=True
324
    )
325
326
    # identify lost zensus cells
327
    df_lost_cells = df_amenities_in_buildings.loc[
328
        df_amenities_in_buildings.duplicated(
329
            subset=["id", "duplicate_identifier"], keep="first"
330
        )
331
    ]
332
    df_lost_cells.drop_duplicates(
333
        subset=["zensus_population_id"], inplace=True
334
    )
335
336
    # drop buildings with multiple substation and lower max amenity
337
    df_amenities_in_buildings.drop_duplicates(
338
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
339
    )
340
341
    # check if lost zensus cells are already covered
342
    if not df_lost_cells.empty:
343
        if not (
344
            df_amenities_in_buildings["zensus_population_id"]
345
            .isin(df_lost_cells["zensus_population_id"])
346
            .empty
347
        ):
348
            # query geom data for cell if not
349
            with db.session_scope() as session:
350
                cells_query = session.query(
351
                    DestatisZensusPopulationPerHa.id,
352
                    DestatisZensusPopulationPerHa.geom,
353
                ).filter(
354
                    DestatisZensusPopulationPerHa.id.in_(
355
                        df_lost_cells["zensus_population_id"]
356
                    )
357
                )
358
359
            df_lost_cells = gpd.read_postgis(
360
                cells_query.statement,
361
                cells_query.session.bind,
362
                geom_col="geom",
363
            )
364
            # TODO maybe adapt method
365
            # place random amenity in cell
366
            df_lost_cells["n_amenities_inside"] = 1
367
            df_lost_cells.rename(
368
                columns={
369
                    "id": "zensus_population_id",
370
                },
371
                inplace=True,
372
            )
373
            df_lost_cells = place_buildings_with_amenities(
374
                df_lost_cells, amenities=1
375
            )
376
            df_lost_cells.rename(
377
                columns={
378
                    # "id": "zensus_population_id",
379
                    "geom_point": "geom_amenity",
380
                },
381
                inplace=True,
382
            )
383
            df_lost_cells.drop(
384
                columns=["building_count", "n_amenities_inside"], inplace=True
385
            )
386
        else:
387
            df_lost_cells = None
388
    else:
389
        df_lost_cells = None
390
391
    # drop helper columns
392
    df_amenities_in_buildings.drop(
393
        columns=["duplicate_identifier"], inplace=True
394
    )
395
396
    # sum amenities per building and cell
397
    df_amenities_in_buildings[
398
        "n_amenities_inside"
399
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
400
        "n_amenities_inside"
401
    ].transform(
402
        "sum"
403
    )
404
    # drop duplicated buildings
405
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
406
        ["id", "zensus_population_id"]
407
    )
408
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
409
410
    df_buildings_with_amenities = df_buildings_with_amenities[
411
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
412
    ]
413
    df_buildings_with_amenities.rename(
414
        columns={
415
            # "zensus_population_id": "cell_id",
416
            "egon_building_id": "id"
417
        },
418
        inplace=True,
419
    )
420
421
    return df_buildings_with_amenities, df_lost_cells
422
423
424
def buildings_without_amenities():
425
    """
426
    Buildings (filtered and synthetic) in cells with
427
    cts demand but no amenities are determined.
428
429
    Returns
430
    -------
431
    df_buildings_without_amenities: gpd.GeoDataFrame
432
        Table of buildings without amenities in zensus cells
433
        with cts demand.
434
    """
435
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
436
    from saio.openstreetmap import (
437
        osm_amenities_shops_filtered,
438
        osm_buildings_filtered,
439
        osm_buildings_synthetic,
440
    )
441
442
    # buildings_filtered in cts-demand-cells without amenities
443
    with db.session_scope() as session:
444
445
        # Synthetic Buildings
446
        q_synth_buildings = session.query(
447
            osm_buildings_synthetic.cell_id.cast(Integer).label(
448
                "zensus_population_id"
449
            ),
450
            osm_buildings_synthetic.id.cast(Integer).label("id"),
451
            osm_buildings_synthetic.area.label("area"),
452
            osm_buildings_synthetic.geom_building.label("geom_building"),
453
            osm_buildings_synthetic.geom_point.label("geom_point"),
454
        )
455
456
        # Buildings filtered
457
        q_buildings_filtered = session.query(
458
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
459
            osm_buildings_filtered.id,
460
            osm_buildings_filtered.area,
461
            osm_buildings_filtered.geom_building,
462
            osm_buildings_filtered.geom_point,
463
        ).filter(
464
            osm_buildings_filtered.id
465
            == egon_map_zensus_buildings_filtered_all.id
466
        )
467
468
        # Amenities + zensus_population_id
469
        q_amenities = (
470
            session.query(
471
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
472
            )
473
            .filter(
474
                func.st_within(
475
                    osm_amenities_shops_filtered.geom_amenity,
476
                    DestatisZensusPopulationPerHa.geom,
477
                )
478
            )
479
            .distinct(DestatisZensusPopulationPerHa.id)
480
        )
481
482
        # Cells with CTS demand but without amenities
483
        q_cts_without_amenities = (
484
            session.query(
485
                EgonDemandRegioZensusElectricity.zensus_population_id,
486
            )
487
            .filter(
488
                EgonDemandRegioZensusElectricity.sector == "service",
489
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
490
            )
491
            .filter(
492
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
493
                    q_amenities
494
                )
495
            )
496
            .distinct()
497
        )
498
499
        # Buildings filtered + synthetic buildings residential in
500
        # cells with CTS demand but without amenities
501
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
502
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
503
                q_cts_without_amenities
504
            )
505
        )
506
507
    # df_buildings_without_amenities = pd.read_sql(
508
    #     cells_query.statement, cells_query.session.bind, index_col=None)
509
    df_buildings_without_amenities = gpd.read_postgis(
510
        cells_query.statement,
511
        cells_query.session.bind,
512
        geom_col="geom_building",
513
    )
514
515
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
516
        columns={
517
            # "zensus_population_id": "cell_id",
518
            "egon_building_id": "id",
519
        }
520
    )
521
522
    return df_buildings_without_amenities
523
524
525
def select_cts_buildings(df_buildings_wo_amenities, max_n):
526
    """
527
    N Buildings (filtered and synthetic) in each cell with
528
    cts demand are selected. Only the first n buildings
529
    are taken for each cell. The buildings are sorted by surface
530
    area.
531
532
    Returns
533
    -------
534
    df_buildings_with_cts_demand: gpd.GeoDataFrame
535
        Table of buildings
536
    """
537
538
    df_buildings_wo_amenities.sort_values(
539
        "area", ascending=False, inplace=True
540
    )
541
    # select first n ids each census cell if available
542
    df_buildings_with_cts_demand = (
543
        df_buildings_wo_amenities.groupby("zensus_population_id")
544
        .nth(list(range(max_n)))
545
        .reset_index()
546
    )
547
    df_buildings_with_cts_demand.reset_index(drop=True, inplace=True)
548
549
    return df_buildings_with_cts_demand
550
551
552
def cells_with_cts_demand_only(df_buildings_without_amenities):
553
    """
554
    Cells with cts demand but no amenities or buildilngs
555
    are determined.
556
557
    Returns
558
    -------
559
    df_cells_only_cts_demand: gpd.GeoDataFrame
560
        Table of cells with cts demand but no amenities or buildings
561
    """
562
    from saio.openstreetmap import osm_amenities_shops_filtered
563
564
    # cells mit amenities
565
    with db.session_scope() as session:
566
        sub_query = (
567
            session.query(
568
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
569
            )
570
            .filter(
571
                func.st_within(
572
                    osm_amenities_shops_filtered.geom_amenity,
573
                    DestatisZensusPopulationPerHa.geom,
574
                )
575
            )
576
            .distinct(DestatisZensusPopulationPerHa.id)
577
        )
578
579
        cells_query = (
580
            session.query(
581
                EgonDemandRegioZensusElectricity.zensus_population_id,
582
                EgonDemandRegioZensusElectricity.scenario,
583
                EgonDemandRegioZensusElectricity.sector,
584
                EgonDemandRegioZensusElectricity.demand,
585
                DestatisZensusPopulationPerHa.geom,
586
            )
587
            .filter(
588
                EgonDemandRegioZensusElectricity.sector == "service",
589
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
590
            )
591
            .filter(
592
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
593
                    sub_query
594
                )
595
            )
596
            .filter(
597
                EgonDemandRegioZensusElectricity.zensus_population_id
598
                == DestatisZensusPopulationPerHa.id
599
            )
600
        )
601
602
    df_cts_cell_without_amenities = gpd.read_postgis(
603
        cells_query.statement,
604
        cells_query.session.bind,
605
        geom_col="geom",
606
        index_col=None,
607
    )
608
609
    # TODO maybe remove
610
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
611
        columns={"cell_id": "zensus_population_id"}
612
    )
613
614
    # Census cells with only cts demand
615
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
616
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
617
            df_buildings_without_amenities["zensus_population_id"].unique()
618
        )
619
    ]
620
621
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
622
623
    return df_cells_only_cts_demand
624
625
626
def calc_census_cell_share(scenario="eGon2035"):
627
    """
628
    The profile share for each census cell is calculated by it's
629
    share of annual demand per substation bus. The annual demand
630
    per cell is defined by DemandRegio. The share is for both
631
    scenarios identical as the annual demand is linearly scaled.
632
633
    Parameters
634
    ----------
635
    scenario: str
636
        Scenario for which the share is calculated.
637
638
    Returns
639
    -------
640
    df_census_share: pd.DataFrame
641
    """
642
643
    with db.session_scope() as session:
644
        cells_query = (
645
            session.query(
646
                EgonDemandRegioZensusElectricity, MapZensusGridDistricts.bus_id
647
            )
648
            .filter(EgonDemandRegioZensusElectricity.sector == "service")
649
            .filter(EgonDemandRegioZensusElectricity.scenario == scenario)
650
            .filter(
651
                EgonDemandRegioZensusElectricity.zensus_population_id
652
                == MapZensusGridDistricts.zensus_population_id
653
            )
654
        )
655
656
    df_demand_regio_electricity_demand = pd.read_sql(
657
        cells_query.statement,
658
        cells_query.session.bind,
659
        index_col="zensus_population_id",
660
    )
661
662
    # get demand share of cell per bus
663
    df_census_share = df_demand_regio_electricity_demand[
664
        "demand"
665
    ] / df_demand_regio_electricity_demand.groupby("bus_id")[
666
        "demand"
667
    ].transform(
668
        "sum"
669
    )
670
    df_census_share = df_census_share.rename("cell_share")
671
672
    df_census_share = pd.concat(
673
        [
674
            df_census_share,
675
            df_demand_regio_electricity_demand[["bus_id", "scenario"]],
676
        ],
677
        axis=1,
678
    )
679
680
    df_census_share.reset_index(inplace=True)
681
    return df_census_share
682
683
684
def calc_building_demand_profile_share(df_cts_buildings, scenario="eGon2035"):
685
    """
686
    Share of cts electricity demand profile per bus for every selected building
687
    is calculated. Building-amenity share is multiplied with census cell share
688
    to get the substation bus profile share for each building. The share is
689
    grouped and aggregated per building as some cover multiple cells.
690
691
    Parameters
692
    ----------
693
    df_cts_buildings: gpd.GeoDataFrame
694
        Table of all buildings with cts demand assigned
695
    scenario: str
696
        Scenario for which the share is calculated.
697
698
    Returns
699
    -------
700
    df_building_share: pd.DataFrame
701
        Table of bus profile share per building
702
703
    """
704
705
    def calc_building_amenity_share(df_cts_buildings):
706
        """
707
        Calculate the building share by the number amenities per building
708
        within a census cell.
709
        """
710
        df_building_amenity_share = df_cts_buildings[
711
            "n_amenities_inside"
712
        ] / df_cts_buildings.groupby("zensus_population_id")[
713
            "n_amenities_inside"
714
        ].transform(
715
            "sum"
716
        )
717
        df_building_amenity_share = pd.concat(
718
            [
719
                df_building_amenity_share.rename("building_amenity_share"),
720
                df_cts_buildings[["zensus_population_id", "id"]],
721
            ],
722
            axis=1,
723
        )
724
        return df_building_amenity_share
725
726
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
727
728
    df_census_cell_share = calc_census_cell_share(scenario)
729
730
    df_demand_share = pd.merge(
731
        left=df_building_amenity_share,
732
        right=df_census_cell_share,
733
        left_on="zensus_population_id",
734
        right_on="zensus_population_id",
735
    )
736
    df_demand_share["profile_share"] = df_demand_share[
737
        "building_amenity_share"
738
    ].multiply(df_demand_share["cell_share"])
739
740
    df_demand_share = df_demand_share[
741
        ["id", "bus_id", "scenario", "profile_share"]
742
    ]
743
    # TODO adapt groupby?
744
    # Group and aggregate per building for multi cell buildings
745
    df_demand_share = (
746
        df_demand_share.groupby(["scenario", "id", "bus_id"])
747
        .sum()
748
        .reset_index()
749
    )
750
    if df_demand_share.duplicated("id", keep=False).any():
751
        print(
752
            df_demand_share.loc[df_demand_share.duplicated("id", keep=False)]
753
        )
754
    return df_demand_share
755
756
757
def calc_building_profiles(
758
    df_demand_share=None,
759
    egon_building_id=None,
760
    bus_id=None,
761
    scenario="eGon2035",
762
):
763
    """
764
    Calculate the demand profile for each building. The profile is
765
    calculated by the demand share of the building per substation bus.
766
767
    Parameters
768
    ----------
769
    df_demand_share: pd.DataFrame
770
        Table of demand share per building. If not given, table is
771
        sourced from database.
772
    egon_building_id: int
773
        Id of the building for which the profile is calculated. If not
774
        given, the profiles are calculated for all buildings.
775
776
    Returns
777
    -------
778
    df_building_profiles: pd.DataFrame
779
        Table of demand profile per building
780
    """
781
782
    if not isinstance(df_demand_share, pd.DataFrame):
783
        with db.session_scope() as session:
784
            cells_query = session.query(
785
                EgonCtsElectricityDemandBuildingShare,
786
            ).filter(
787
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
788
            )
789
790
        df_demand_share = pd.read_sql(
791
            cells_query.statement, cells_query.session.bind, index_col=None
792
        )
793
    else:
794
        df_demand_share = df_demand_share.loc[
795
            df_demand_share["scenario"] == scenario
796
        ]
797
798
    # TODO workaround
799
    df_demand_share = df_demand_share.drop(columns="serial")
800
801
    df_cts_profiles = calc_load_curves_cts(scenario)
802
803
    # get demand share of selected building id
804
    if isinstance(egon_building_id, int):
805
        if egon_building_id in df_demand_share["id"]:
806
            df_demand_share = df_demand_share.loc[
807
                df_demand_share["id"] == egon_building_id
808
            ]
809
        else:
810
            raise KeyError(f"Building with id {egon_building_id} not found")
811
812
    # get demand share of all buildings for selected bus id
813
    if isinstance(bus_id, int):
814
        if bus_id in df_demand_share["bus_id"]:
815
            df_demand_share = df_demand_share.loc[
816
                df_demand_share["bus_id"] == bus_id
817
            ]
818
        else:
819
            raise KeyError(f"Bus with id {bus_id} not found")
820
821
    # get demand profile for all buildings for selected demand share
822
    df_building_profiles = pd.DataFrame()
823
    for bus_id, df in df_demand_share.groupby("bus_id"):
824
        shares = df.set_index("id", drop=True)["profile_share"]
825
        profile = df_cts_profiles.loc[:, bus_id]
826
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 823 is not entered. Are you sure this can never be the case?
Loading history...
827
        df_building_profiles = pd.concat(
828
            [df_building_profiles, building_profiles], axis=1
829
        )
830
831
    return df_building_profiles
832
833
834
def cts_to_buildings():
835
    """
836
    Assigns CTS demand to buildings and calculates the respective demand
837
    profiles. The demand profile per substation are disaggregated per
838
    annual demand share of each census cell and by the number of amenities
839
    per building within the cell. If no building data is available,
840
    synthetic buildings are generated around the amenities. If no amenities
841
    but cts demand is available, buildings are randomly selected. If no
842
    building nor amenity is available, random synthetic buildings are
843
    generated. The demand share is stored in the database.
844
845
    Note:
846
    -----
847
    Cells with CTS demand, amenities and buildings do not change within
848
    the scenarios, only the demand itself. Therefore scenario eGon2035
849
    can be used universally to determine the cts buildings but not for
850
    he demand share.
851
    """
852
853
    # Buildings with amenities
854
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
855
856
    # Median number of amenities per cell
857
    median_n_amenities = int(
858
        df_buildings_with_amenities.groupby("zensus_population_id")[
859
            "n_amenities_inside"
860
        ]
861
        .sum()
862
        .median()
863
    )
864
865
    # Remove synthetic CTS buildings if existing
866
    delete_synthetic_cts_buildings()
867
868
    # Amenities not assigned to buildings
869
    df_amenities_without_buildings = amenities_without_buildings()
870
871
    # Append lost cells due to duplicated ids, to cover all demand cells
872
    if df_lost_cells.empty:
873
874
        df_lost_cells["amenities"] = median_n_amenities
875
        # create row for every amenity
876
        df_lost_cells["amenities"] = (
877
            df_lost_cells["amenities"].astype(int).apply(range)
878
        )
879
        df_lost_cells = df_lost_cells.explode("amenities")
880
        df_lost_cells.drop(columns="amenities", inplace=True)
881
        df_amenities_without_buildings = df_amenities_without_buildings.append(
882
            df_lost_cells, ignore_index=True
883
        )
884
    # One building per amenity
885
    df_amenities_without_buildings["n_amenities_inside"] = 1
886
    # Create synthetic buildings for amenites without buildings
887
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
888
        df_amenities_without_buildings, points="geom_amenity"
889
    )
890
891
    # TODO write to DB and remove renaming
892
    write_table_to_postgis(
893
        df_synthetic_buildings_with_amenities.rename(
894
            columns={
895
                "zensus_population_id": "cell_id",
896
                "egon_building_id": "id",
897
            }
898
        ),
899
        OsmBuildingsSynthetic,
900
        drop=False,
901
    )
902
903
    # Cells without amenities but CTS demand and buildings
904
    df_buildings_without_amenities = buildings_without_amenities()
905
906
    # TODO Fix Adhoc Bugfix duplicated buildings
907
    # drop building ids which have already been used
908
    mask = df_buildings_without_amenities.loc[
909
        df_buildings_without_amenities["id"].isin(
910
            df_buildings_with_amenities["id"]
911
        )
912
    ].index
913
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
914
        index=mask
915
    ).reset_index(drop=True)
916
917
    # select median n buildings per cell
918
    df_buildings_without_amenities = select_cts_buildings(
919
        df_buildings_without_amenities, max_n=median_n_amenities
920
    )
921
    df_buildings_without_amenities["n_amenities_inside"] = 1
922
923
    # Create synthetic amenities and buildings in cells with only CTS demand
924
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
925
        df_buildings_without_amenities
926
    )
927
    # Median n Amenities per cell
928
    df_cells_with_cts_demand_only["amenities"] = median_n_amenities
929
    # create row for every amenity
930
    df_cells_with_cts_demand_only["amenities"] = (
931
        df_cells_with_cts_demand_only["amenities"].astype(int).apply(range)
932
    )
933
    df_cells_with_cts_demand_only = df_cells_with_cts_demand_only.explode(
934
        "amenities"
935
    )
936
    df_cells_with_cts_demand_only.drop(columns="amenities", inplace=True)
937
938
    # Only 1 Amenity per Building
939
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
940
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
941
        df_cells_with_cts_demand_only, amenities=1
942
    )
943
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
944
        df_cells_with_cts_demand_only, points="geom_point"
945
    )
946
947
    # TODO write to DB and remove (backup) renaming
948
    write_table_to_postgis(
949
        df_synthetic_buildings_without_amenities.rename(
950
            columns={
951
                "zensus_population_id": "cell_id",
952
                "egon_building_id": "id",
953
            }
954
        ),
955
        OsmBuildingsSynthetic,
956
        drop=False,
957
    )
958
959
    # Concat all buildings
960
    columns = [
961
        "zensus_population_id",
962
        "id",
963
        "geom_building",
964
        "n_amenities_inside",
965
        "source",
966
    ]
967
968
    df_buildings_with_amenities["source"] = "bwa"
969
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
970
    df_buildings_without_amenities["source"] = "bwoa"
971
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
972
973
    df_cts_buildings = pd.concat(
974
        [
975
            df_buildings_with_amenities[columns],
976
            df_synthetic_buildings_with_amenities[columns],
977
            df_buildings_without_amenities[columns],
978
            df_synthetic_buildings_without_amenities[columns],
979
        ],
980
        axis=0,
981
        ignore_index=True,
982
    )
983
    # TODO maybe remove after #772
984
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
985
986
    # Write table to db for debugging
987
    # TODO remove later
988
    df_cts_buildings = gpd.GeoDataFrame(
989
        df_cts_buildings, geometry="geom_building", crs=3035
990
    )
991
    df_cts_buildings = df_cts_buildings.reset_index().rename(
992
        columns={"index": "serial"}
993
    )
994
    write_table_to_postgis(
995
        df_cts_buildings,
996
        CtsBuildings,
997
        drop=True,
998
    )
999
1000
    df_demand_share_2035 = calc_building_demand_profile_share(
1001
        df_cts_buildings, scenario="eGon2035"
1002
    )
1003
    df_demand_share_100RE = calc_building_demand_profile_share(
1004
        df_cts_buildings, scenario="eGon100RE"
1005
    )
1006
1007
    df_demand_share = pd.concat(
1008
        [df_demand_share_2035, df_demand_share_100RE],
1009
        axis=0,
1010
        ignore_index=True,
1011
    )
1012
    # TODO workaround
1013
    df_demand_share = df_demand_share.reset_index().rename(
1014
        columns={"index": "serial"}
1015
    )
1016
1017
    write_table_to_postgres(
1018
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
1019
    )
1020
1021
    return df_cts_buildings, df_demand_share
1022
1023
1024
def get_peak_load_cts_buildings():
1025
    """
1026
    Get peak load of all CTS buildings for both scenarios and store in DB.
1027
    """
1028
1029
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
1030
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename("eGon2035")
1031
    df_building_profiles = calc_building_profiles(scenario="eGon100RE")
1032
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename("eGon100RE")
1033
    df_peak_load = pd.concat(
1034
        [df_peak_load_2035, df_peak_load_100RE], axis=1
1035
    ).reset_index()
1036
1037
    # TODO rename table column to egon_building_id
1038
    df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1039
    df_peak_load["type"] = "cts"
1040
    df_peak_load = df_peak_load.melt(
1041
        id_vars=["building_id", "type"],
1042
        var_name="scenario",
1043
        value_name="peak_load_in_w",
1044
    )
1045
    # TODO Check units, maybe MwH?
1046
    # Convert unit to W
1047
    df_peak_load["peak_load_in_w"] = df_peak_load["peak_load_in_w"] * 1e6
1048
    # Delete rows with cts demand
1049
    with db.session_scope() as session:
1050
        session.query(BuildingPeakLoads).filter(
1051
            BuildingPeakLoads.type == "cts"
1052
        ).delete()
1053
1054
    # Write peak loads into db
1055
    with db.session_scope() as session:
1056
        session.bulk_insert_mappings(
1057
            BuildingPeakLoads,
1058
            df_peak_load.to_dict(orient="records"),
1059
        )
1060
1061
1062
def delete_synthetic_cts_buildings():
1063
    """
1064
    All synthetic cts buildings are deleted from the DB. This is necessary if
1065
    the task is run multiple times as the existing synthetic buildings
1066
    influence the results.
1067
    """
1068
    # import db tables
1069
    from saio.openstreetmap import osm_buildings_synthetic
1070
1071
    # cells mit amenities
1072
    with db.session_scope() as session:
1073
        session.query(osm_buildings_synthetic).filter(
1074
            osm_buildings_synthetic.building == "cts"
1075
        ).delete()
1076
1077
1078
class CtsElectricityBuildings(Dataset):
1079
    def __init__(self, dependencies):
1080
        super().__init__(
1081
            name="CtsElectricityBuildings",
1082
            version="0.0.0",
1083
            dependencies=dependencies,
1084
            tasks=(
1085
                cts_to_buildings,
1086
                get_peak_load_cts_buildings,
1087
                # get_all_cts_building_profiles,
1088
            ),
1089
        )
1090