Passed
Pull Request — dev (#809)
by
unknown
01:58 queued 14s
created

calc_building_profiles()   C

Complexity

Conditions 11

Size

Total Lines 99
Code Lines 41

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 41
dl 0
loc 99
rs 5.4
c 0
b 0
f 0
cc 11
nop 5

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings.calc_building_profiles() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import Column, Float, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import pandas as pd
7
import saio
8
9
from egon.data import db
10
from egon.data.datasets import Dataset
11
from egon.data.datasets.electricity_demand import (
12
    EgonDemandRegioZensusElectricity,
13
)
14
from egon.data.datasets.electricity_demand.temporal import (
15
    EgonEtragoElectricityCts,
16
    calc_load_curves_cts,
17
)
18
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
19
    BuildingPeakLoads,
20
    OsmBuildingsSynthetic,
21
)
22
from egon.data.datasets.electricity_demand_timeseries.tools import (
23
    random_ints_until_sum,
24
    random_point_in_square,
25
    specific_int_until_sum,
26
    write_table_to_postgis,
27
    write_table_to_postgres,
28
)
29
from egon.data.datasets.heat_demand import EgonPetaHeat
30
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
31
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
32
33
engine = db.engine()
34
Base = declarative_base()
35
36
# import db tables
37
saio.register_schema("openstreetmap", engine=engine)
38
saio.register_schema("society", engine=engine)
39
saio.register_schema("demand", engine=engine)
40
saio.register_schema("boundaries", engine=engine)
41
42
43 View Code Duplication
class EgonCtsElectricityDemandBuildingShare(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
44
    __tablename__ = "egon_cts_electricity_demand_building_share"
45
    __table_args__ = {"schema": "demand"}
46
47
    serial = Column(Integer, primary_key=True)
48
    id = Column(Integer, index=True)
49
    scenario = Column(String, index=True)
50
    # id = Column(Integer, primary_key=True)
51
    # scenario = Column(String, primary_key=True)
52
    bus_id = Column(Integer, index=True)
53
    profile_share = Column(Float)
54
55
56 View Code Duplication
class EgonCtsHeatDemandBuildingShare(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
57
    __tablename__ = "egon_cts_heat_demand_building_share"
58
    __table_args__ = {"schema": "demand"}
59
60
    serial = Column(Integer, primary_key=True)
61
    id = Column(Integer, index=True)
62
    scenario = Column(String, index=True)
63
    # id = Column(Integer, primary_key=True)
64
    # scenario = Column(String, primary_key=True)
65
    bus_id = Column(Integer, index=True)
66
    profile_share = Column(Float)
67
68
69
class CtsBuildings(Base):
70
    __tablename__ = "egon_cts_buildings"
71
    __table_args__ = {"schema": "openstreetmap"}
72
73
    serial = Column(Integer, primary_key=True)
74
    id = Column(Integer, index=True)
75
    zensus_population_id = Column(Integer, index=True)
76
    geom_building = Column(Geometry("Polygon", 3035))
77
    n_amenities_inside = Column(Integer)
78
    source = Column(String)
79
80
81
def amenities_without_buildings():
82
    """
83
    Amenities which have no buildings assigned and are in
84
    a cell with cts demand are determined.
85
86
    Returns
87
    -------
88
    pd.DataFrame
89
        Table of amenities without buildings
90
    """
91
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
92
93
    with db.session_scope() as session:
94
        cells_query = (
95
            session.query(
96
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
97
                # TODO can be used for square around amenity
98
                #  (1 geom_amenity: 1 geom_building)
99
                #  not unique amenity_ids yet
100
                osm_amenities_not_in_buildings_filtered.geom_amenity,
101
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
102
                # EgonDemandRegioZensusElectricity.demand,
103
                # # TODO can be used to generate n random buildings
104
                # # (n amenities : 1 randombuilding)
105
                # func.count(
106
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
107
                # ).label("n_amenities_inside"),
108
                # DestatisZensusPopulationPerHa.geom,
109
            )
110
            .filter(
111
                func.st_within(
112
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
113
                    DestatisZensusPopulationPerHa.geom,
114
                )
115
            )
116
            .filter(
117
                DestatisZensusPopulationPerHa.id
118
                == EgonDemandRegioZensusElectricity.zensus_population_id
119
            )
120
            .filter(
121
                EgonDemandRegioZensusElectricity.sector == "service",
122
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
123
                #         ).group_by(
124
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
125
                #             DestatisZensusPopulationPerHa.geom,
126
            )
127
        )
128
    # # TODO can be used to generate n random buildings
129
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
130
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
131
    # )
132
    #
133
134
    # # TODO can be used for square around amenity
135
    df_amenities_without_buildings = gpd.read_postgis(
136
        cells_query.statement,
137
        cells_query.session.bind,
138
        geom_col="geom_amenity",
139
    )
140
    return df_amenities_without_buildings
141
142
143
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
144
    """
145
    Building centroids are placed randomly within census cells.
146
    The Number of buildings is derived from n_amenity_inside, the selected
147
    method and number of amenities per building.
148
149
    Returns
150
    -------
151
    df: gpd.GeoDataFrame
152
        Table of buildings centroids
153
    """
154
    if isinstance(max_amenities, int):
155
        # amount of amenities is randomly generated within bounds
156
        # (max_amenities, amenities per cell)
157
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
158
            random_ints_until_sum, args=[max_amenities]
159
        )
160
    if isinstance(amenities, int):
161
        # Specific amount of amenities per building
162
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
163
            specific_int_until_sum, args=[amenities]
164
        )
165
166
    # Unnest each building
167
    df = df.explode(column="n_amenities_inside")
168
169
    # building count per cell
170
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
171
172
    # generate random synthetic buildings
173
    edge_length = 5
174
    # create random points within census cells
175
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
176
177
    df.reset_index(drop=True, inplace=True)
178
    # Store center of polygon
179
    df["geom_point"] = points
180
    # Drop geometry of census cell
181
    df = df.drop(columns=["geom"])
182
183
    return df
184
185
186
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
187
    """
188
    Synthetic buildings are generated around points.
189
190
    Parameters
191
    ----------
192
    df: pd.DataFrame
193
        Table of census cells
194
    points: gpd.GeoSeries or str
195
        List of points to place buildings around or column name of df
196
    crs: str
197
        CRS of result table
198
199
    Returns
200
    -------
201
    df: gpd.GeoDataFrame
202
        Synthetic buildings
203
    """
204
205
    if isinstance(points, str) and points in df.columns:
206
        points = df[points]
207
    elif isinstance(points, gpd.GeoSeries):
208
        pass
209
    else:
210
        raise ValueError("Points are of the wrong type")
211
212
    # Create building using a square around point
213
    edge_length = 5
214
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
215
216
    if "geom_point" not in df.columns:
217
        df["geom_point"] = df["geom_building"].centroid
218
219
    # TODO Check CRS
220
    df = gpd.GeoDataFrame(
221
        df,
222
        crs=crs,
223
        geometry="geom_building",
224
    )
225
226
    # TODO remove after implementation of egon_building_id
227
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
228
229
    # get max number of building ids from synthetic residential table
230
    with db.session_scope() as session:
231
        max_synth_residential_id = session.execute(
232
            func.max(OsmBuildingsSynthetic.id)
233
        ).scalar()
234
    max_synth_residential_id = int(max_synth_residential_id)
235
236
    # create sequential ids
237
    df["egon_building_id"] = range(
238
        max_synth_residential_id + 1,
239
        max_synth_residential_id + df.shape[0] + 1,
240
    )
241
242
    df["area"] = df["geom_building"].area
243
    # set building type of synthetic building
244
    df["building"] = "cts"
245
    # TODO remove in #772
246
    df = df.rename(
247
        columns={
248
            # "zensus_population_id": "cell_id",
249
            "egon_building_id": "id",
250
        }
251
    )
252
    return df
253
254
255
def buildings_with_amenities():
256
    """
257
    Amenities which are assigned to buildings are determined
258
    and grouped per building and zensus cell. Buildings
259
    covering multiple cells therefore exists multiple times
260
    but in different zensus cells. This is necessary to cover
261
    all cells with a cts demand. If buildings exist in multiple
262
    substations, their amenities are summed and assigned and kept in
263
    one substation only. If as a result, a census cell is uncovered,
264
    a synthetic amenity is placed. The buildings are aggregated
265
    afterwards during the calculation of the profile_share.
266
267
    Returns
268
    -------
269
    df_buildings_with_amenities: gpd.GeoDataFrame
270
        Contains all buildings with amenities per zensus cell.
271
    df_lost_cells: gpd.GeoDataFrame
272
        Contains synthetic amenities in lost cells. Might be empty
273
    """
274
275
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
276
277
    with db.session_scope() as session:
278
        cells_query = (
279
            session.query(
280
                osm_amenities_in_buildings_filtered,
281
                MapZensusGridDistricts.bus_id,
282
            )
283
            .filter(
284
                MapZensusGridDistricts.zensus_population_id
285
                == osm_amenities_in_buildings_filtered.zensus_population_id
286
            )
287
            .filter(
288
                EgonDemandRegioZensusElectricity.zensus_population_id
289
                == osm_amenities_in_buildings_filtered.zensus_population_id
290
            )
291
            .filter(
292
                EgonDemandRegioZensusElectricity.sector == "service",
293
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
294
            )
295
        )
296
    df_amenities_in_buildings = pd.read_sql(
297
        cells_query.statement, cells_query.session.bind, index_col=None
298
    )
299
300
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
301
        "geom_building"
302
    ].apply(to_shape)
303
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
304
        "geom_amenity"
305
    ].apply(to_shape)
306
307
    df_amenities_in_buildings["n_amenities_inside"] = 1
308
309
    # add identifier column for buildings in multiple substations
310
    df_amenities_in_buildings[
311
        "duplicate_identifier"
312
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
313
        "n_amenities_inside"
314
    ].transform(
315
        "cumsum"
316
    )
317
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
318
        ["id", "duplicate_identifier"]
319
    )
320
    # sum amenities of buildings with multiple substations
321
    df_amenities_in_buildings[
322
        "n_amenities_inside"
323
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
324
        "n_amenities_inside"
325
    ].transform(
326
        "sum"
327
    )
328
329
    # create column to always go for bus_id with max amenities
330
    df_amenities_in_buildings[
331
        "max_amenities"
332
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
333
        "n_amenities_inside"
334
    ].transform(
335
        "sum"
336
    )
337
    # sort to go for
338
    df_amenities_in_buildings.sort_values(
339
        ["id", "max_amenities"], ascending=False, inplace=True
340
    )
341
342
    # identify lost zensus cells
343
    df_lost_cells = df_amenities_in_buildings.loc[
344
        df_amenities_in_buildings.duplicated(
345
            subset=["id", "duplicate_identifier"], keep="first"
346
        )
347
    ]
348
    df_lost_cells.drop_duplicates(
349
        subset=["zensus_population_id"], inplace=True
350
    )
351
352
    # drop buildings with multiple substation and lower max amenity
353
    df_amenities_in_buildings.drop_duplicates(
354
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
355
    )
356
357
    # check if lost zensus cells are already covered
358
    if not df_lost_cells.empty:
359
        if not (
360
            df_amenities_in_buildings["zensus_population_id"]
361
            .isin(df_lost_cells["zensus_population_id"])
362
            .empty
363
        ):
364
            # query geom data for cell if not
365
            with db.session_scope() as session:
366
                cells_query = session.query(
367
                    DestatisZensusPopulationPerHa.id,
368
                    DestatisZensusPopulationPerHa.geom,
369
                ).filter(
370
                    DestatisZensusPopulationPerHa.id.in_(
371
                        df_lost_cells["zensus_population_id"]
372
                    )
373
                )
374
375
            df_lost_cells = gpd.read_postgis(
376
                cells_query.statement,
377
                cells_query.session.bind,
378
                geom_col="geom",
379
            )
380
            # TODO maybe adapt method
381
            # place random amenity in cell
382
            df_lost_cells["n_amenities_inside"] = 1
383
            df_lost_cells.rename(
384
                columns={
385
                    "id": "zensus_population_id",
386
                },
387
                inplace=True,
388
            )
389
            df_lost_cells = place_buildings_with_amenities(
390
                df_lost_cells, amenities=1
391
            )
392
            df_lost_cells.rename(
393
                columns={
394
                    # "id": "zensus_population_id",
395
                    "geom_point": "geom_amenity",
396
                },
397
                inplace=True,
398
            )
399
            df_lost_cells.drop(
400
                columns=["building_count", "n_amenities_inside"], inplace=True
401
            )
402
        else:
403
            df_lost_cells = None
404
    else:
405
        df_lost_cells = None
406
407
    # drop helper columns
408
    df_amenities_in_buildings.drop(
409
        columns=["duplicate_identifier"], inplace=True
410
    )
411
412
    # sum amenities per building and cell
413
    df_amenities_in_buildings[
414
        "n_amenities_inside"
415
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
416
        "n_amenities_inside"
417
    ].transform(
418
        "sum"
419
    )
420
    # drop duplicated buildings
421
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
422
        ["id", "zensus_population_id"]
423
    )
424
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
425
426
    df_buildings_with_amenities = df_buildings_with_amenities[
427
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
428
    ]
429
    df_buildings_with_amenities.rename(
430
        columns={
431
            # "zensus_population_id": "cell_id",
432
            "egon_building_id": "id"
433
        },
434
        inplace=True,
435
    )
436
437
    return df_buildings_with_amenities, df_lost_cells
438
439
440
def buildings_without_amenities():
441
    """
442
    Buildings (filtered and synthetic) in cells with
443
    cts demand but no amenities are determined.
444
445
    Returns
446
    -------
447
    df_buildings_without_amenities: gpd.GeoDataFrame
448
        Table of buildings without amenities in zensus cells
449
        with cts demand.
450
    """
451
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
452
    from saio.openstreetmap import (
453
        osm_amenities_shops_filtered,
454
        osm_buildings_filtered,
455
        osm_buildings_synthetic,
456
    )
457
458
    # buildings_filtered in cts-demand-cells without amenities
459
    with db.session_scope() as session:
460
461
        # Synthetic Buildings
462
        q_synth_buildings = session.query(
463
            osm_buildings_synthetic.cell_id.cast(Integer).label(
464
                "zensus_population_id"
465
            ),
466
            osm_buildings_synthetic.id.cast(Integer).label("id"),
467
            osm_buildings_synthetic.area.label("area"),
468
            osm_buildings_synthetic.geom_building.label("geom_building"),
469
            osm_buildings_synthetic.geom_point.label("geom_point"),
470
        )
471
472
        # Buildings filtered
473
        q_buildings_filtered = session.query(
474
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
475
            osm_buildings_filtered.id,
476
            osm_buildings_filtered.area,
477
            osm_buildings_filtered.geom_building,
478
            osm_buildings_filtered.geom_point,
479
        ).filter(
480
            osm_buildings_filtered.id
481
            == egon_map_zensus_buildings_filtered_all.id
482
        )
483
484
        # Amenities + zensus_population_id
485
        q_amenities = (
486
            session.query(
487
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
488
            )
489
            .filter(
490
                func.st_within(
491
                    osm_amenities_shops_filtered.geom_amenity,
492
                    DestatisZensusPopulationPerHa.geom,
493
                )
494
            )
495
            .distinct(DestatisZensusPopulationPerHa.id)
496
        )
497
498
        # Cells with CTS demand but without amenities
499
        q_cts_without_amenities = (
500
            session.query(
501
                EgonDemandRegioZensusElectricity.zensus_population_id,
502
            )
503
            .filter(
504
                EgonDemandRegioZensusElectricity.sector == "service",
505
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
506
            )
507
            .filter(
508
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
509
                    q_amenities
510
                )
511
            )
512
            .distinct()
513
        )
514
515
        # Buildings filtered + synthetic buildings residential in
516
        # cells with CTS demand but without amenities
517
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
518
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
519
                q_cts_without_amenities
520
            )
521
        )
522
523
    # df_buildings_without_amenities = pd.read_sql(
524
    #     cells_query.statement, cells_query.session.bind, index_col=None)
525
    df_buildings_without_amenities = gpd.read_postgis(
526
        cells_query.statement,
527
        cells_query.session.bind,
528
        geom_col="geom_building",
529
    )
530
531
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
532
        columns={
533
            # "zensus_population_id": "cell_id",
534
            "egon_building_id": "id",
535
        }
536
    )
537
538
    return df_buildings_without_amenities
539
540
541
def select_cts_buildings(df_buildings_wo_amenities, max_n):
542
    """
543
    N Buildings (filtered and synthetic) in each cell with
544
    cts demand are selected. Only the first n buildings
545
    are taken for each cell. The buildings are sorted by surface
546
    area.
547
548
    Returns
549
    -------
550
    df_buildings_with_cts_demand: gpd.GeoDataFrame
551
        Table of buildings
552
    """
553
554
    df_buildings_wo_amenities.sort_values(
555
        "area", ascending=False, inplace=True
556
    )
557
    # select first n ids each census cell if available
558
    df_buildings_with_cts_demand = (
559
        df_buildings_wo_amenities.groupby("zensus_population_id")
560
        .nth(list(range(max_n)))
561
        .reset_index()
562
    )
563
    df_buildings_with_cts_demand.reset_index(drop=True, inplace=True)
564
565
    return df_buildings_with_cts_demand
566
567
568
def cells_with_cts_demand_only(df_buildings_without_amenities):
569
    """
570
    Cells with cts demand but no amenities or buildilngs
571
    are determined.
572
573
    Returns
574
    -------
575
    df_cells_only_cts_demand: gpd.GeoDataFrame
576
        Table of cells with cts demand but no amenities or buildings
577
    """
578
    from saio.openstreetmap import osm_amenities_shops_filtered
579
580
    # cells mit amenities
581
    with db.session_scope() as session:
582
        sub_query = (
583
            session.query(
584
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
585
            )
586
            .filter(
587
                func.st_within(
588
                    osm_amenities_shops_filtered.geom_amenity,
589
                    DestatisZensusPopulationPerHa.geom,
590
                )
591
            )
592
            .distinct(DestatisZensusPopulationPerHa.id)
593
        )
594
595
        cells_query = (
596
            session.query(
597
                EgonDemandRegioZensusElectricity.zensus_population_id,
598
                EgonDemandRegioZensusElectricity.scenario,
599
                EgonDemandRegioZensusElectricity.sector,
600
                EgonDemandRegioZensusElectricity.demand,
601
                DestatisZensusPopulationPerHa.geom,
602
            )
603
            .filter(
604
                EgonDemandRegioZensusElectricity.sector == "service",
605
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
606
            )
607
            .filter(
608
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
609
                    sub_query
610
                )
611
            )
612
            .filter(
613
                EgonDemandRegioZensusElectricity.zensus_population_id
614
                == DestatisZensusPopulationPerHa.id
615
            )
616
        )
617
618
    df_cts_cell_without_amenities = gpd.read_postgis(
619
        cells_query.statement,
620
        cells_query.session.bind,
621
        geom_col="geom",
622
        index_col=None,
623
    )
624
625
    # TODO maybe remove
626
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
627
        columns={"cell_id": "zensus_population_id"}
628
    )
629
630
    # Census cells with only cts demand
631
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
632
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
633
            df_buildings_without_amenities["zensus_population_id"].unique()
634
        )
635
    ]
636
637
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
638
639
    return df_cells_only_cts_demand
640
641
642
def calc_census_cell_share(scenario="eGon2035", sector="electricity"):
643
    """
644
    The profile share for each census cell is calculated by it's
645
    share of annual demand per substation bus. The annual demand
646
    per cell is defined by DemandRegio/Peta5. The share is for both
647
    scenarios identical as the annual demand is linearly scaled.
648
649
    Parameters
650
    ----------
651
    scenario: str
652
        Scenario for which the share is calculated.
653
    sector: str
654
        Scenario for which the share is calculated.
655
656
    Returns
657
    -------
658
    df_census_share: pd.DataFrame
659
    """
660
    if sector == "electricity":
661
        demand_table = EgonDemandRegioZensusElectricity
662
    elif sector == "heat":
663
        demand_table = EgonPetaHeat
664
665
    with db.session_scope() as session:
666
        cells_query = (
667
            session.query(demand_table, MapZensusGridDistricts.bus_id)
0 ignored issues
show
introduced by
The variable demand_table does not seem to be defined for all execution paths.
Loading history...
668
            .filter(demand_table.sector == "service")
669
            .filter(demand_table.scenario == scenario)
670
            .filter(
671
                demand_table.zensus_population_id
672
                == MapZensusGridDistricts.zensus_population_id
673
            )
674
        )
675
676
    df_demand = pd.read_sql(
677
        cells_query.statement,
678
        cells_query.session.bind,
679
        index_col="zensus_population_id",
680
    )
681
682
    # get demand share of cell per bus
683
    df_census_share = df_demand["demand"] / df_demand.groupby("bus_id")[
684
        "demand"
685
    ].transform("sum")
686
    df_census_share = df_census_share.rename("cell_share")
687
688
    df_census_share = pd.concat(
689
        [
690
            df_census_share,
691
            df_demand[["bus_id", "scenario"]],
692
        ],
693
        axis=1,
694
    )
695
696
    df_census_share.reset_index(inplace=True)
697
    return df_census_share
698
699
700
def calc_building_demand_profile_share(
701
    df_cts_buildings, scenario="eGon2035", sector="electricity"
702
):
703
    """
704
    Share of cts electricity demand profile per bus for every selected building
705
    is calculated. Building-amenity share is multiplied with census cell share
706
    to get the substation bus profile share for each building. The share is
707
    grouped and aggregated per building as some cover multiple cells.
708
709
    Parameters
710
    ----------
711
    df_cts_buildings: gpd.GeoDataFrame
712
        Table of all buildings with cts demand assigned
713
    scenario: str
714
        Scenario for which the share is calculated.
715
    sector: str
716
        Sector for which the share is calculated.
717
718
    Returns
719
    -------
720
    df_building_share: pd.DataFrame
721
        Table of bus profile share per building
722
723
    """
724
725
    def calc_building_amenity_share(df_cts_buildings):
726
        """
727
        Calculate the building share by the number amenities per building
728
        within a census cell.
729
        """
730
        df_building_amenity_share = df_cts_buildings[
731
            "n_amenities_inside"
732
        ] / df_cts_buildings.groupby("zensus_population_id")[
733
            "n_amenities_inside"
734
        ].transform(
735
            "sum"
736
        )
737
        df_building_amenity_share = pd.concat(
738
            [
739
                df_building_amenity_share.rename("building_amenity_share"),
740
                df_cts_buildings[["zensus_population_id", "id"]],
741
            ],
742
            axis=1,
743
        )
744
        return df_building_amenity_share
745
746
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
747
748
    df_census_cell_share = calc_census_cell_share(
749
        scenario=scenario, sector=sector
750
    )
751
752
    df_demand_share = pd.merge(
753
        left=df_building_amenity_share,
754
        right=df_census_cell_share,
755
        left_on="zensus_population_id",
756
        right_on="zensus_population_id",
757
    )
758
    df_demand_share["profile_share"] = df_demand_share[
759
        "building_amenity_share"
760
    ].multiply(df_demand_share["cell_share"])
761
762
    df_demand_share = df_demand_share[
763
        ["id", "bus_id", "scenario", "profile_share"]
764
    ]
765
    # TODO adapt groupby?
766
    # Group and aggregate per building for multi cell buildings
767
    df_demand_share = (
768
        df_demand_share.groupby(["scenario", "id", "bus_id"])
769
        .sum()
770
        .reset_index()
771
    )
772
    if df_demand_share.duplicated("id", keep=False).any():
773
        print(
774
            df_demand_share.loc[df_demand_share.duplicated("id", keep=False)]
775
        )
776
    return df_demand_share
777
778
779
def calc_building_profiles(
780
    df_demand_share=None,
781
    egon_building_id=None,
782
    bus_id=None,
783
    scenario="eGon2035",
784
    sector="electricity",
785
):
786
    """
787
    Calculate the demand profile for each building. The profile is
788
    calculated by the demand share of the building per substation bus.
789
790
    Parameters
791
    ----------
792
    df_demand_share: pd.DataFrame
793
        Table of demand share per building.
794
    egon_building_id: int
795
        Id of the building for which the profile is calculated. If not
796
        given, the profiles are calculated for all buildings.
797
    scenario: str
798
        Scenario for which the share is calculated.
799
    sector: str
800
        Sector for which the share is calculated.
801
802
    Returns
803
    -------
804
    df_building_profiles: pd.DataFrame
805
        Table of demand profile per building
806
    """
807
    if sector == "electricity":
808
        with db.session_scope() as session:
809
            cells_query = session.query(
810
                EgonCtsElectricityDemandBuildingShare,
811
            ).filter(
812
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
813
            )
814
815
        df_demand_share = pd.read_sql(
816
            cells_query.statement, cells_query.session.bind, index_col=None
817
        )
818
    elif sector == "heat":
819
        with db.session_scope() as session:
820
            cells_query = session.query(
821
                EgonCtsHeatDemandBuildingShare,
822
            ).filter(EgonCtsHeatDemandBuildingShare.scenario == scenario)
823
824
        df_demand_share = pd.read_sql(
825
            cells_query.statement, cells_query.session.bind, index_col=None
826
        )
827
828
    # TODO workaround
829
    df_demand_share = df_demand_share.drop(columns="serial")
830
831
    # TODO maybe use demand.egon_etrago_electricity_cts
832
    # with db.session_scope() as session:
833
    #     cells_query = (
834
    #         session.query(
835
    #             EgonEtragoElectricityCts
836
    #         ).filter(
837
    #             EgonEtragoElectricityCts.scn_name == scenario)
838
    #     )
839
    #
840
    # df_cts_profiles = pd.read_sql(
841
    #     cells_query.statement,
842
    #     cells_query.session.bind,
843
    # )
844
    # df_cts_profiles = pd.DataFrame.from_dict(
845
    #   df_cts_profiles.set_index('bus_id')['p_set'].to_dict(),
846
    #   orient="index")
847
    df_cts_profiles = calc_load_curves_cts(scenario)
848
849
    # get demand share of selected building id
850
    if isinstance(egon_building_id, int):
851
        if egon_building_id in df_demand_share["id"]:
852
            df_demand_share = df_demand_share.loc[
853
                df_demand_share["id"] == egon_building_id
854
            ]
855
        else:
856
            raise KeyError(f"Building with id {egon_building_id} not found")
857
858
    # get demand share of all buildings for selected bus id
859
    if isinstance(bus_id, int):
860
        if bus_id in df_demand_share["bus_id"]:
861
            df_demand_share = df_demand_share.loc[
862
                df_demand_share["bus_id"] == bus_id
863
            ]
864
        else:
865
            raise KeyError(f"Bus with id {bus_id} not found")
866
867
    # get demand profile for all buildings for selected demand share
868
    df_building_profiles = pd.DataFrame()
869
    for bus_id, df in df_demand_share.groupby("bus_id"):
870
        shares = df.set_index("id", drop=True)["profile_share"]
871
        profile = df_cts_profiles.loc[:, bus_id]
872
        building_profiles = profile.apply(lambda x: x * shares)
0 ignored issues
show
introduced by
The variable shares does not seem to be defined in case the for loop on line 869 is not entered. Are you sure this can never be the case?
Loading history...
873
        df_building_profiles = pd.concat(
874
            [df_building_profiles, building_profiles], axis=1
875
        )
876
877
    return df_building_profiles
878
879
880
def delete_synthetic_cts_buildings():
881
    """
882
    All synthetic cts buildings are deleted from the DB. This is necessary if
883
    the task is run multiple times as the existing synthetic buildings
884
    influence the results.
885
    """
886
    # import db tables
887
    from saio.openstreetmap import osm_buildings_synthetic
888
889
    # cells mit amenities
890
    with db.session_scope() as session:
891
        session.query(osm_buildings_synthetic).filter(
892
            osm_buildings_synthetic.building == "cts"
893
        ).delete()
894
895
896
def cts_buildings():
897
    """
898
    Assigns CTS demand to buildings and calculates the respective demand
899
    profiles. The demand profile per substation are disaggregated per
900
    annual demand share of each census cell and by the number of amenities
901
    per building within the cell. If no building data is available,
902
    synthetic buildings are generated around the amenities. If no amenities
903
    but cts demand is available, buildings are randomly selected. If no
904
    building nor amenity is available, random synthetic buildings are
905
    generated. The demand share is stored in the database.
906
907
    Note:
908
    -----
909
    Cells with CTS demand, amenities and buildings do not change within
910
    the scenarios, only the demand itself. Therefore scenario eGon2035
911
    can be used universally to determine the cts buildings but not for
912
    he demand share.
913
    """
914
915
    # Buildings with amenities
916
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
917
918
    # Median number of amenities per cell
919
    median_n_amenities = int(
920
        df_buildings_with_amenities.groupby("zensus_population_id")[
921
            "n_amenities_inside"
922
        ]
923
        .sum()
924
        .median()
925
    )
926
927
    # Remove synthetic CTS buildings if existing
928
    delete_synthetic_cts_buildings()
929
930
    # Amenities not assigned to buildings
931
    df_amenities_without_buildings = amenities_without_buildings()
932
933
    # Append lost cells due to duplicated ids, to cover all demand cells
934
    if df_lost_cells.empty:
935
936
        df_lost_cells["amenities"] = median_n_amenities
937
        # create row for every amenity
938
        df_lost_cells["amenities"] = (
939
            df_lost_cells["amenities"].astype(int).apply(range)
940
        )
941
        df_lost_cells = df_lost_cells.explode("amenities")
942
        df_lost_cells.drop(columns="amenities", inplace=True)
943
        df_amenities_without_buildings = df_amenities_without_buildings.append(
944
            df_lost_cells, ignore_index=True
945
        )
946
    # One building per amenity
947
    df_amenities_without_buildings["n_amenities_inside"] = 1
948
    # Create synthetic buildings for amenites without buildings
949
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
950
        df_amenities_without_buildings, points="geom_amenity"
951
    )
952
953
    # TODO write to DB and remove renaming
954
    write_table_to_postgis(
955
        df_synthetic_buildings_with_amenities.rename(
956
            columns={
957
                "zensus_population_id": "cell_id",
958
                "egon_building_id": "id",
959
            }
960
        ),
961
        OsmBuildingsSynthetic,
962
        drop=False,
963
    )
964
965
    # Cells without amenities but CTS demand and buildings
966
    df_buildings_without_amenities = buildings_without_amenities()
967
968
    # TODO Fix Adhoc Bugfix duplicated buildings
969
    # drop building ids which have already been used
970
    mask = df_buildings_without_amenities.loc[
971
        df_buildings_without_amenities["id"].isin(
972
            df_buildings_with_amenities["id"]
973
        )
974
    ].index
975
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
976
        index=mask
977
    ).reset_index(drop=True)
978
979
    # select median n buildings per cell
980
    df_buildings_without_amenities = select_cts_buildings(
981
        df_buildings_without_amenities, max_n=median_n_amenities
982
    )
983
    df_buildings_without_amenities["n_amenities_inside"] = 1
984
985
    # Create synthetic amenities and buildings in cells with only CTS demand
986
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
987
        df_buildings_without_amenities
988
    )
989
    # Median n Amenities per cell
990
    df_cells_with_cts_demand_only["amenities"] = median_n_amenities
991
    # create row for every amenity
992
    df_cells_with_cts_demand_only["amenities"] = (
993
        df_cells_with_cts_demand_only["amenities"].astype(int).apply(range)
994
    )
995
    df_cells_with_cts_demand_only = df_cells_with_cts_demand_only.explode(
996
        "amenities"
997
    )
998
    df_cells_with_cts_demand_only.drop(columns="amenities", inplace=True)
999
1000
    # Only 1 Amenity per Building
1001
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
1002
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
1003
        df_cells_with_cts_demand_only, amenities=1
1004
    )
1005
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
1006
        df_cells_with_cts_demand_only, points="geom_point"
1007
    )
1008
1009
    # TODO write to DB and remove (backup) renaming
1010
    write_table_to_postgis(
1011
        df_synthetic_buildings_without_amenities.rename(
1012
            columns={
1013
                "zensus_population_id": "cell_id",
1014
                "egon_building_id": "id",
1015
            }
1016
        ),
1017
        OsmBuildingsSynthetic,
1018
        drop=False,
1019
    )
1020
1021
    # Concat all buildings
1022
    columns = [
1023
        "zensus_population_id",
1024
        "id",
1025
        "geom_building",
1026
        "n_amenities_inside",
1027
        "source",
1028
    ]
1029
1030
    df_buildings_with_amenities["source"] = "bwa"
1031
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
1032
    df_buildings_without_amenities["source"] = "bwoa"
1033
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
1034
1035
    df_cts_buildings = pd.concat(
1036
        [
1037
            df_buildings_with_amenities[columns],
1038
            df_synthetic_buildings_with_amenities[columns],
1039
            df_buildings_without_amenities[columns],
1040
            df_synthetic_buildings_without_amenities[columns],
1041
        ],
1042
        axis=0,
1043
        ignore_index=True,
1044
    )
1045
    # TODO maybe remove after #772
1046
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
1047
1048
    # Write table to db for debugging
1049
    # TODO remove later
1050
    df_cts_buildings = gpd.GeoDataFrame(
1051
        df_cts_buildings, geometry="geom_building", crs=3035
1052
    )
1053
    df_cts_buildings = df_cts_buildings.reset_index().rename(
1054
        columns={"index": "serial"}
1055
    )
1056
    write_table_to_postgis(
1057
        df_cts_buildings,
1058
        CtsBuildings,
1059
        drop=True,
1060
    )
1061
1062
1063 View Code Duplication
def cts_electricity():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1064
    """
1065
    Calculate cts electricity demand share of hvmv substation profile
1066
     for buildings.
1067
    """
1068
1069
    with db.session_scope() as session:
1070
        cells_query = session.query(CtsBuildings)
1071
1072
    df_cts_buildings = pd.read_sql(
1073
        cells_query.statement, cells_query.session.bind, index_col=None
1074
    )
1075
1076
    df_demand_share_2035 = calc_building_demand_profile_share(
1077
        df_cts_buildings, scenario="eGon2035", sector="electricity"
1078
    )
1079
    df_demand_share_100RE = calc_building_demand_profile_share(
1080
        df_cts_buildings, scenario="eGon100RE", sector="electricity"
1081
    )
1082
1083
    df_demand_share = pd.concat(
1084
        [df_demand_share_2035, df_demand_share_100RE],
1085
        axis=0,
1086
        ignore_index=True,
1087
    )
1088
    # TODO workaround
1089
    df_demand_share = df_demand_share.reset_index().rename(
1090
        columns={"index": "serial"}
1091
    )
1092
1093
    write_table_to_postgres(
1094
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
1095
    )
1096
1097
1098 View Code Duplication
def cts_heat():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1099
    """
1100
    Calculate cts electricity demand share of hvmv substation profile
1101
     for buildings.
1102
    """
1103
1104
    with db.session_scope() as session:
1105
        cells_query = session.query(CtsBuildings)
1106
1107
    df_cts_buildings = pd.read_sql(
1108
        cells_query.statement, cells_query.session.bind, index_col=None
1109
    )
1110
1111
    df_demand_share_2035 = calc_building_demand_profile_share(
1112
        df_cts_buildings, scenario="eGon2035", sector="heat"
1113
    )
1114
    df_demand_share_100RE = calc_building_demand_profile_share(
1115
        df_cts_buildings, scenario="eGon100RE", sector="heat"
1116
    )
1117
1118
    df_demand_share = pd.concat(
1119
        [df_demand_share_2035, df_demand_share_100RE],
1120
        axis=0,
1121
        ignore_index=True,
1122
    )
1123
    # TODO workaround
1124
    df_demand_share = df_demand_share.reset_index().rename(
1125
        columns={"index": "serial"}
1126
    )
1127
1128
    write_table_to_postgres(
1129
        df_demand_share, EgonCtsHeatDemandBuildingShare, drop=True
1130
    )
1131
1132
1133
def get_cts_electricity_peak_load():
1134
    """
1135
    Get peak load of all CTS buildings for both scenarios and store in DB.
1136
    """
1137
1138
    df_building_profiles = calc_building_profiles(scenario="eGon2035")
1139
    df_peak_load_2035 = df_building_profiles.max(axis=0).rename("eGon2035")
1140
    df_building_profiles = calc_building_profiles(scenario="eGon100RE")
1141
    df_peak_load_100RE = df_building_profiles.max(axis=0).rename("eGon100RE")
1142
    df_peak_load = pd.concat(
1143
        [df_peak_load_2035, df_peak_load_100RE], axis=1
1144
    ).reset_index()
1145
1146
    # TODO rename table column to egon_building_id
1147
    df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1148
    df_peak_load["type"] = "cts"
1149
    df_peak_load = df_peak_load.melt(
1150
        id_vars=["building_id", "type"],
1151
        var_name="scenario",
1152
        value_name="peak_load_in_w",
1153
    )
1154
    # TODO Check units, maybe MwH?
1155
    # Convert unit to W
1156
    df_peak_load["peak_load_in_w"] = df_peak_load["peak_load_in_w"] * 1e6
1157
    # Delete rows with cts demand
1158
    with db.session_scope() as session:
1159
        session.query(BuildingPeakLoads).filter(
1160
            BuildingPeakLoads.type == "cts"
1161
        ).delete()
1162
1163
    # Write peak loads into db
1164
    with db.session_scope() as session:
1165
        session.bulk_insert_mappings(
1166
            BuildingPeakLoads,
1167
            df_peak_load.to_dict(orient="records"),
1168
        )
1169
1170
1171
class CtsElectricityBuildings(Dataset):
1172
    def __init__(self, dependencies):
1173
        super().__init__(
1174
            name="CtsElectricityBuildings",
1175
            version="0.0.0",
1176
            dependencies=dependencies,
1177
            tasks=(
1178
                cts_buildings,
1179
                {cts_electricity, cts_heat},
1180
                get_cts_electricity_peak_load,
1181
            ),
1182
        )
1183