Passed
Pull Request — dev (#826)
by
unknown
02:40
created

data.datasets.electricity_demand_timeseries.cts_buildings   B

Complexity

Total Complexity 51

Size/Duplication

Total Lines 1215
Duplicated Lines 5.35 %

Importance

Changes 0
Metric Value
wmc 51
eloc 626
dl 65
loc 1215
rs 7.894
c 0
b 0
f 0

16 Functions

Rating   Name   Duplication   Size   Complexity  
A cts_heat() 33 33 2
A place_buildings_with_amenities() 0 41 3
B create_synthetic_buildings() 0 67 6
A delete_synthetic_cts_buildings() 0 14 2
A get_cts_electricity_peak_load() 0 38 4
C calc_building_profiles() 0 106 10
A amenities_without_buildings() 0 60 2
A select_cts_buildings() 0 25 1
B cells_with_cts_demand_only() 0 72 2
A start_logging() 0 11 1
A calc_building_demand_profile_share() 0 77 2
C buildings_with_amenities() 0 183 5
A cts_electricity() 32 32 2
A calc_census_cell_share() 0 56 4
B cts_buildings() 0 184 2
B buildings_without_amenities() 0 99 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A CtsElectricityBuildings.__init__() 0 9 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like data.datasets.electricity_demand_timeseries.cts_buildings often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from geoalchemy2 import Geometry
2
from geoalchemy2.shape import to_shape
3
from sqlalchemy import REAL, Column, Integer, String, func
4
from sqlalchemy.ext.declarative import declarative_base
5
import geopandas as gpd
6
import numpy as np
7
import pandas as pd
8
import saio
9
import logging
10
11
from egon.data import db
12
from egon.data.datasets import Dataset
13
from egon.data.datasets.electricity_demand import (
14
    EgonDemandRegioZensusElectricity,
15
)
16
from egon.data.datasets.electricity_demand.temporal import (
17
    EgonEtragoElectricityCts,
18
    calc_load_curves_cts,
19
)
20
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
21
    BuildingPeakLoads,
22
    OsmBuildingsSynthetic,
23
)
24
from egon.data.datasets.electricity_demand_timeseries.tools import (
25
    random_ints_until_sum,
26
    random_point_in_square,
27
    specific_int_until_sum,
28
    write_table_to_postgis,
29
    write_table_to_postgres,
30
)
31
from egon.data.datasets.heat_demand import EgonPetaHeat
32
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts
33
from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa
34
35
engine = db.engine()
36
Base = declarative_base()
37
38
# import db tables
39
saio.register_schema("openstreetmap", engine=engine)
40
saio.register_schema("boundaries", engine=engine)
41
42
43
class EgonCtsElectricityDemandBuildingShare(Base):
44
    __tablename__ = "egon_cts_electricity_demand_building_share"
45
    __table_args__ = {"schema": "demand"}
46
47
    id = Column(Integer, primary_key=True)
48
    scenario = Column(String, primary_key=True)
49
    bus_id = Column(Integer, index=True)
50
    profile_share = Column(REAL)
51
52
53
class EgonCtsHeatDemandBuildingShare(Base):
54
    __tablename__ = "egon_cts_heat_demand_building_share"
55
    __table_args__ = {"schema": "demand"}
56
57
    id = Column(Integer, primary_key=True)
58
    scenario = Column(String, primary_key=True)
59
    bus_id = Column(Integer, index=True)
60
    profile_share = Column(REAL)
61
62
63
class CtsBuildings(Base):
64
    __tablename__ = "egon_cts_buildings"
65
    __table_args__ = {"schema": "openstreetmap"}
66
67
    serial = Column(Integer, primary_key=True)
68
    id = Column(Integer, index=True)
69
    zensus_population_id = Column(Integer, index=True)
70
    geom_building = Column(Geometry("Polygon", 3035))
71
    n_amenities_inside = Column(Integer)
72
    source = Column(String)
73
74
75
def start_logging():
76
    """Start logging into console"""
77
    log = logging.getLogger()
78
    log.setLevel(logging.INFO)
79
    logformat = logging.Formatter(
80
        "%(asctime)s %(message)s", "%m/%d/%Y %H:%M:%S"
81
    )
82
    sh = logging.StreamHandler()
83
    sh.setFormatter(logformat)
84
    log.addHandler(sh)
85
    return log
86
87
88
def amenities_without_buildings():
89
    """
90
    Amenities which have no buildings assigned and are in
91
    a cell with cts demand are determined.
92
93
    Returns
94
    -------
95
    pd.DataFrame
96
        Table of amenities without buildings
97
    """
98
    from saio.openstreetmap import osm_amenities_not_in_buildings_filtered
99
100
    with db.session_scope() as session:
101
        cells_query = (
102
            session.query(
103
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
104
                # TODO can be used for square around amenity
105
                #  (1 geom_amenity: 1 geom_building)
106
                #  not unique amenity_ids yet
107
                osm_amenities_not_in_buildings_filtered.geom_amenity,
108
                osm_amenities_not_in_buildings_filtered.egon_amenity_id,
109
                # EgonDemandRegioZensusElectricity.demand,
110
                # # TODO can be used to generate n random buildings
111
                # # (n amenities : 1 randombuilding)
112
                # func.count(
113
                #     osm_amenities_not_in_buildings_filtered.egon_amenity_id
114
                # ).label("n_amenities_inside"),
115
                # DestatisZensusPopulationPerHa.geom,
116
            )
117
            .filter(
118
                func.st_within(
119
                    osm_amenities_not_in_buildings_filtered.geom_amenity,
120
                    DestatisZensusPopulationPerHa.geom,
121
                )
122
            )
123
            .filter(
124
                DestatisZensusPopulationPerHa.id
125
                == EgonDemandRegioZensusElectricity.zensus_population_id
126
            )
127
            .filter(
128
                EgonDemandRegioZensusElectricity.sector == "service",
129
                EgonDemandRegioZensusElectricity.scenario == "eGon2035"
130
                #         ).group_by(
131
                #             EgonDemandRegioZensusElectricity.zensus_population_id,
132
                #             DestatisZensusPopulationPerHa.geom,
133
            )
134
        )
135
    # # TODO can be used to generate n random buildings
136
    # df_cells_with_amenities_not_in_buildings = gpd.read_postgis(
137
    #     cells_query.statement, cells_query.session.bind, geom_col="geom"
138
    # )
139
    #
140
141
    # # TODO can be used for square around amenity
142
    df_amenities_without_buildings = gpd.read_postgis(
143
        cells_query.statement,
144
        cells_query.session.bind,
145
        geom_col="geom_amenity",
146
    )
147
    return df_amenities_without_buildings
148
149
150
def place_buildings_with_amenities(df, amenities=None, max_amenities=None):
151
    """
152
    Building centroids are placed randomly within census cells.
153
    The Number of buildings is derived from n_amenity_inside, the selected
154
    method and number of amenities per building.
155
156
    Returns
157
    -------
158
    df: gpd.GeoDataFrame
159
        Table of buildings centroids
160
    """
161
    if isinstance(max_amenities, int):
162
        # amount of amenities is randomly generated within bounds
163
        # (max_amenities, amenities per cell)
164
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
165
            random_ints_until_sum, args=[max_amenities]
166
        )
167
    if isinstance(amenities, int):
168
        # Specific amount of amenities per building
169
        df["n_amenities_inside"] = df["n_amenities_inside"].apply(
170
            specific_int_until_sum, args=[amenities]
171
        )
172
173
    # Unnest each building
174
    df = df.explode(column="n_amenities_inside")
175
176
    # building count per cell
177
    df["building_count"] = df.groupby(["zensus_population_id"]).cumcount() + 1
178
179
    # generate random synthetic buildings
180
    edge_length = 5
181
    # create random points within census cells
182
    points = random_point_in_square(geom=df["geom"], tol=edge_length / 2)
183
184
    df.reset_index(drop=True, inplace=True)
185
    # Store center of polygon
186
    df["geom_point"] = points
187
    # Drop geometry of census cell
188
    df = df.drop(columns=["geom"])
189
190
    return df
191
192
193
def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):
194
    """
195
    Synthetic buildings are generated around points.
196
197
    Parameters
198
    ----------
199
    df: pd.DataFrame
200
        Table of census cells
201
    points: gpd.GeoSeries or str
202
        List of points to place buildings around or column name of df
203
    crs: str
204
        CRS of result table
205
206
    Returns
207
    -------
208
    df: gpd.GeoDataFrame
209
        Synthetic buildings
210
    """
211
212
    if isinstance(points, str) and points in df.columns:
213
        points = df[points]
214
    elif isinstance(points, gpd.GeoSeries):
215
        pass
216
    else:
217
        raise ValueError("Points are of the wrong type")
218
219
    # Create building using a square around point
220
    edge_length = 5
221
    df["geom_building"] = points.buffer(distance=edge_length / 2, cap_style=3)
222
223
    if "geom_point" not in df.columns:
224
        df["geom_point"] = df["geom_building"].centroid
225
226
    # TODO Check CRS
227
    df = gpd.GeoDataFrame(
228
        df,
229
        crs=crs,
230
        geometry="geom_building",
231
    )
232
233
    # TODO remove after implementation of egon_building_id
234
    df.rename(columns={"id": "egon_building_id"}, inplace=True)
235
236
    # get max number of building ids from synthetic residential table
237
    with db.session_scope() as session:
238
        max_synth_residential_id = session.execute(
239
            func.max(OsmBuildingsSynthetic.id)
240
        ).scalar()
241
    max_synth_residential_id = int(max_synth_residential_id)
242
243
    # create sequential ids
244
    df["egon_building_id"] = range(
245
        max_synth_residential_id + 1,
246
        max_synth_residential_id + df.shape[0] + 1,
247
    )
248
249
    df["area"] = df["geom_building"].area
250
    # set building type of synthetic building
251
    df["building"] = "cts"
252
    # TODO remove in #772
253
    df = df.rename(
254
        columns={
255
            # "zensus_population_id": "cell_id",
256
            "egon_building_id": "id",
257
        }
258
    )
259
    return df
260
261
262
def buildings_with_amenities():
263
    """
264
    Amenities which are assigned to buildings are determined
265
    and grouped per building and zensus cell. Buildings
266
    covering multiple cells therefore exists multiple times
267
    but in different zensus cells. This is necessary to cover
268
    all cells with a cts demand. If buildings exist in multiple
269
    substations, their amenities are summed and assigned and kept in
270
    one substation only. If as a result, a census cell is uncovered,
271
    a synthetic amenity is placed. The buildings are aggregated
272
    afterwards during the calculation of the profile_share.
273
274
    Returns
275
    -------
276
    df_buildings_with_amenities: gpd.GeoDataFrame
277
        Contains all buildings with amenities per zensus cell.
278
    df_lost_cells: gpd.GeoDataFrame
279
        Contains synthetic amenities in lost cells. Might be empty
280
    """
281
282
    from saio.openstreetmap import osm_amenities_in_buildings_filtered
283
284
    with db.session_scope() as session:
285
        cells_query = (
286
            session.query(
287
                osm_amenities_in_buildings_filtered,
288
                MapZensusGridDistricts.bus_id,
289
            )
290
            .filter(
291
                MapZensusGridDistricts.zensus_population_id
292
                == osm_amenities_in_buildings_filtered.zensus_population_id
293
            )
294
            .filter(
295
                EgonDemandRegioZensusElectricity.zensus_population_id
296
                == osm_amenities_in_buildings_filtered.zensus_population_id
297
            )
298
            .filter(
299
                EgonDemandRegioZensusElectricity.sector == "service",
300
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
301
            )
302
        )
303
    df_amenities_in_buildings = pd.read_sql(
304
        cells_query.statement, cells_query.session.bind, index_col=None
305
    )
306
307
    df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
308
        "geom_building"
309
    ].apply(to_shape)
310
    df_amenities_in_buildings["geom_amenity"] = df_amenities_in_buildings[
311
        "geom_amenity"
312
    ].apply(to_shape)
313
314
    df_amenities_in_buildings["n_amenities_inside"] = 1
315
316
    # add identifier column for buildings in multiple substations
317
    df_amenities_in_buildings[
318
        "duplicate_identifier"
319
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
320
        "n_amenities_inside"
321
    ].transform(
322
        "cumsum"
323
    )
324
    df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
325
        ["id", "duplicate_identifier"]
326
    )
327
    # sum amenities of buildings with multiple substations
328
    df_amenities_in_buildings[
329
        "n_amenities_inside"
330
    ] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
331
        "n_amenities_inside"
332
    ].transform(
333
        "sum"
334
    )
335
336
    # create column to always go for bus_id with max amenities
337
    df_amenities_in_buildings[
338
        "max_amenities"
339
    ] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
340
        "n_amenities_inside"
341
    ].transform(
342
        "sum"
343
    )
344
    # sort to go for
345
    df_amenities_in_buildings.sort_values(
346
        ["id", "max_amenities"], ascending=False, inplace=True
347
    )
348
349
    # identify lost zensus cells
350
    df_lost_cells = df_amenities_in_buildings.loc[
351
        df_amenities_in_buildings.duplicated(
352
            subset=["id", "duplicate_identifier"], keep="first"
353
        )
354
    ]
355
    df_lost_cells.drop_duplicates(
356
        subset=["zensus_population_id"], inplace=True
357
    )
358
359
    # drop buildings with multiple substation and lower max amenity
360
    df_amenities_in_buildings.drop_duplicates(
361
        subset=["id", "duplicate_identifier"], keep="first", inplace=True
362
    )
363
364
    # check if lost zensus cells are already covered
365
    if not df_lost_cells.empty:
366
        if not (
367
            df_amenities_in_buildings["zensus_population_id"]
368
            .isin(df_lost_cells["zensus_population_id"])
369
            .empty
370
        ):
371
            # query geom data for cell if not
372
            with db.session_scope() as session:
373
                cells_query = session.query(
374
                    DestatisZensusPopulationPerHa.id,
375
                    DestatisZensusPopulationPerHa.geom,
376
                ).filter(
377
                    DestatisZensusPopulationPerHa.id.in_(
378
                        df_lost_cells["zensus_population_id"]
379
                    )
380
                )
381
382
            df_lost_cells = gpd.read_postgis(
383
                cells_query.statement,
384
                cells_query.session.bind,
385
                geom_col="geom",
386
            )
387
            # TODO maybe adapt method
388
            # place random amenity in cell
389
            df_lost_cells["n_amenities_inside"] = 1
390
            df_lost_cells.rename(
391
                columns={
392
                    "id": "zensus_population_id",
393
                },
394
                inplace=True,
395
            )
396
            df_lost_cells = place_buildings_with_amenities(
397
                df_lost_cells, amenities=1
398
            )
399
            df_lost_cells.rename(
400
                columns={
401
                    # "id": "zensus_population_id",
402
                    "geom_point": "geom_amenity",
403
                },
404
                inplace=True,
405
            )
406
            df_lost_cells.drop(
407
                columns=["building_count", "n_amenities_inside"], inplace=True
408
            )
409
        else:
410
            df_lost_cells = None
411
    else:
412
        df_lost_cells = None
413
414
    # drop helper columns
415
    df_amenities_in_buildings.drop(
416
        columns=["duplicate_identifier"], inplace=True
417
    )
418
419
    # sum amenities per building and cell
420
    df_amenities_in_buildings[
421
        "n_amenities_inside"
422
    ] = df_amenities_in_buildings.groupby(["zensus_population_id", "id"])[
423
        "n_amenities_inside"
424
    ].transform(
425
        "sum"
426
    )
427
    # drop duplicated buildings
428
    df_buildings_with_amenities = df_amenities_in_buildings.drop_duplicates(
429
        ["id", "zensus_population_id"]
430
    )
431
    df_buildings_with_amenities.reset_index(inplace=True, drop=True)
432
433
    df_buildings_with_amenities = df_buildings_with_amenities[
434
        ["id", "zensus_population_id", "geom_building", "n_amenities_inside"]
435
    ]
436
    df_buildings_with_amenities.rename(
437
        columns={
438
            # "zensus_population_id": "cell_id",
439
            "egon_building_id": "id"
440
        },
441
        inplace=True,
442
    )
443
444
    return df_buildings_with_amenities, df_lost_cells
445
446
447
def buildings_without_amenities():
448
    """
449
    Buildings (filtered and synthetic) in cells with
450
    cts demand but no amenities are determined.
451
452
    Returns
453
    -------
454
    df_buildings_without_amenities: gpd.GeoDataFrame
455
        Table of buildings without amenities in zensus cells
456
        with cts demand.
457
    """
458
    from saio.boundaries import egon_map_zensus_buildings_filtered_all
459
    from saio.openstreetmap import (
460
        osm_amenities_shops_filtered,
461
        osm_buildings_filtered,
462
        osm_buildings_synthetic,
463
    )
464
465
    # buildings_filtered in cts-demand-cells without amenities
466
    with db.session_scope() as session:
467
468
        # Synthetic Buildings
469
        q_synth_buildings = session.query(
470
            osm_buildings_synthetic.cell_id.cast(Integer).label(
471
                "zensus_population_id"
472
            ),
473
            osm_buildings_synthetic.id.cast(Integer).label("id"),
474
            osm_buildings_synthetic.area.label("area"),
475
            osm_buildings_synthetic.geom_building.label("geom_building"),
476
            osm_buildings_synthetic.geom_point.label("geom_point"),
477
        )
478
479
        # Buildings filtered
480
        q_buildings_filtered = session.query(
481
            egon_map_zensus_buildings_filtered_all.zensus_population_id,
482
            osm_buildings_filtered.id,
483
            osm_buildings_filtered.area,
484
            osm_buildings_filtered.geom_building,
485
            osm_buildings_filtered.geom_point,
486
        ).filter(
487
            osm_buildings_filtered.id
488
            == egon_map_zensus_buildings_filtered_all.id
489
        )
490
491
        # Amenities + zensus_population_id
492
        q_amenities = (
493
            session.query(
494
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
495
            )
496
            .filter(
497
                func.st_within(
498
                    osm_amenities_shops_filtered.geom_amenity,
499
                    DestatisZensusPopulationPerHa.geom,
500
                )
501
            )
502
            .distinct(DestatisZensusPopulationPerHa.id)
503
        )
504
505
        # Cells with CTS demand but without amenities
506
        q_cts_without_amenities = (
507
            session.query(
508
                EgonDemandRegioZensusElectricity.zensus_population_id,
509
            )
510
            .filter(
511
                EgonDemandRegioZensusElectricity.sector == "service",
512
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
513
            )
514
            .filter(
515
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
516
                    q_amenities
517
                )
518
            )
519
            .distinct()
520
        )
521
522
        # Buildings filtered + synthetic buildings residential in
523
        # cells with CTS demand but without amenities
524
        cells_query = q_synth_buildings.union(q_buildings_filtered).filter(
525
            egon_map_zensus_buildings_filtered_all.zensus_population_id.in_(
526
                q_cts_without_amenities
527
            )
528
        )
529
530
    # df_buildings_without_amenities = pd.read_sql(
531
    #     cells_query.statement, cells_query.session.bind, index_col=None)
532
    df_buildings_without_amenities = gpd.read_postgis(
533
        cells_query.statement,
534
        cells_query.session.bind,
535
        geom_col="geom_building",
536
    )
537
538
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
539
        columns={
540
            # "zensus_population_id": "cell_id",
541
            "egon_building_id": "id",
542
        }
543
    )
544
545
    return df_buildings_without_amenities
546
547
548
def select_cts_buildings(df_buildings_wo_amenities, max_n):
549
    """
550
    N Buildings (filtered and synthetic) in each cell with
551
    cts demand are selected. Only the first n buildings
552
    are taken for each cell. The buildings are sorted by surface
553
    area.
554
555
    Returns
556
    -------
557
    df_buildings_with_cts_demand: gpd.GeoDataFrame
558
        Table of buildings
559
    """
560
561
    df_buildings_wo_amenities.sort_values(
562
        "area", ascending=False, inplace=True
563
    )
564
    # select first n ids each census cell if available
565
    df_buildings_with_cts_demand = (
566
        df_buildings_wo_amenities.groupby("zensus_population_id")
567
        .nth(list(range(max_n)))
568
        .reset_index()
569
    )
570
    df_buildings_with_cts_demand.reset_index(drop=True, inplace=True)
571
572
    return df_buildings_with_cts_demand
573
574
575
def cells_with_cts_demand_only(df_buildings_without_amenities):
576
    """
577
    Cells with cts demand but no amenities or buildilngs
578
    are determined.
579
580
    Returns
581
    -------
582
    df_cells_only_cts_demand: gpd.GeoDataFrame
583
        Table of cells with cts demand but no amenities or buildings
584
    """
585
    from saio.openstreetmap import osm_amenities_shops_filtered
586
587
    # cells mit amenities
588
    with db.session_scope() as session:
589
        sub_query = (
590
            session.query(
591
                DestatisZensusPopulationPerHa.id.label("zensus_population_id"),
592
            )
593
            .filter(
594
                func.st_within(
595
                    osm_amenities_shops_filtered.geom_amenity,
596
                    DestatisZensusPopulationPerHa.geom,
597
                )
598
            )
599
            .distinct(DestatisZensusPopulationPerHa.id)
600
        )
601
602
        cells_query = (
603
            session.query(
604
                EgonDemandRegioZensusElectricity.zensus_population_id,
605
                EgonDemandRegioZensusElectricity.scenario,
606
                EgonDemandRegioZensusElectricity.sector,
607
                EgonDemandRegioZensusElectricity.demand,
608
                DestatisZensusPopulationPerHa.geom,
609
            )
610
            .filter(
611
                EgonDemandRegioZensusElectricity.sector == "service",
612
                EgonDemandRegioZensusElectricity.scenario == "eGon2035",
613
            )
614
            .filter(
615
                EgonDemandRegioZensusElectricity.zensus_population_id.notin_(
616
                    sub_query
617
                )
618
            )
619
            .filter(
620
                EgonDemandRegioZensusElectricity.zensus_population_id
621
                == DestatisZensusPopulationPerHa.id
622
            )
623
        )
624
625
    df_cts_cell_without_amenities = gpd.read_postgis(
626
        cells_query.statement,
627
        cells_query.session.bind,
628
        geom_col="geom",
629
        index_col=None,
630
    )
631
632
    # TODO maybe remove
633
    df_buildings_without_amenities = df_buildings_without_amenities.rename(
634
        columns={"cell_id": "zensus_population_id"}
635
    )
636
637
    # Census cells with only cts demand
638
    df_cells_only_cts_demand = df_cts_cell_without_amenities.loc[
639
        ~df_cts_cell_without_amenities["zensus_population_id"].isin(
640
            df_buildings_without_amenities["zensus_population_id"].unique()
641
        )
642
    ]
643
644
    df_cells_only_cts_demand.reset_index(drop=True, inplace=True)
645
646
    return df_cells_only_cts_demand
647
648
649
def calc_census_cell_share(scenario="eGon2035", sector="electricity"):
650
    """
651
    The profile share for each census cell is calculated by it's
652
    share of annual demand per substation bus. The annual demand
653
    per cell is defined by DemandRegio/Peta5. The share is for both
654
    scenarios identical as the annual demand is linearly scaled.
655
656
    Parameters
657
    ----------
658
    scenario: str
659
        Scenario for which the share is calculated.
660
    sector: str
661
        Scenario for which the share is calculated.
662
663
    Returns
664
    -------
665
    df_census_share: pd.DataFrame
666
    """
667
    if sector == "electricity":
668
        demand_table = EgonDemandRegioZensusElectricity
669
    elif sector == "heat":
670
        demand_table = EgonPetaHeat
671
672
    with db.session_scope() as session:
673
        cells_query = (
674
            session.query(demand_table, MapZensusGridDistricts.bus_id)
0 ignored issues
show
introduced by
The variable demand_table does not seem to be defined for all execution paths.
Loading history...
675
            .filter(demand_table.sector == "service")
676
            .filter(demand_table.scenario == scenario)
677
            .filter(
678
                demand_table.zensus_population_id
679
                == MapZensusGridDistricts.zensus_population_id
680
            )
681
        )
682
683
    df_demand = pd.read_sql(
684
        cells_query.statement,
685
        cells_query.session.bind,
686
        index_col="zensus_population_id",
687
    )
688
689
    # get demand share of cell per bus
690
    df_census_share = df_demand["demand"] / df_demand.groupby("bus_id")[
691
        "demand"
692
    ].transform("sum")
693
    df_census_share = df_census_share.rename("cell_share")
694
695
    df_census_share = pd.concat(
696
        [
697
            df_census_share,
698
            df_demand[["bus_id", "scenario"]],
699
        ],
700
        axis=1,
701
    )
702
703
    df_census_share.reset_index(inplace=True)
704
    return df_census_share
705
706
707
def calc_building_demand_profile_share(
708
    df_cts_buildings, scenario="eGon2035", sector="electricity"
709
):
710
    """
711
    Share of cts electricity demand profile per bus for every selected building
712
    is calculated. Building-amenity share is multiplied with census cell share
713
    to get the substation bus profile share for each building. The share is
714
    grouped and aggregated per building as some cover multiple cells.
715
716
    Parameters
717
    ----------
718
    df_cts_buildings: gpd.GeoDataFrame
719
        Table of all buildings with cts demand assigned
720
    scenario: str
721
        Scenario for which the share is calculated.
722
    sector: str
723
        Sector for which the share is calculated.
724
725
    Returns
726
    -------
727
    df_building_share: pd.DataFrame
728
        Table of bus profile share per building
729
730
    """
731
732
    def calc_building_amenity_share(df_cts_buildings):
733
        """
734
        Calculate the building share by the number amenities per building
735
        within a census cell.
736
        """
737
        df_building_amenity_share = df_cts_buildings[
738
            "n_amenities_inside"
739
        ] / df_cts_buildings.groupby("zensus_population_id")[
740
            "n_amenities_inside"
741
        ].transform(
742
            "sum"
743
        )
744
        df_building_amenity_share = pd.concat(
745
            [
746
                df_building_amenity_share.rename("building_amenity_share"),
747
                df_cts_buildings[["zensus_population_id", "id"]],
748
            ],
749
            axis=1,
750
        )
751
        return df_building_amenity_share
752
753
    df_building_amenity_share = calc_building_amenity_share(df_cts_buildings)
754
755
    df_census_cell_share = calc_census_cell_share(
756
        scenario=scenario, sector=sector
757
    )
758
759
    df_demand_share = pd.merge(
760
        left=df_building_amenity_share,
761
        right=df_census_cell_share,
762
        left_on="zensus_population_id",
763
        right_on="zensus_population_id",
764
    )
765
    df_demand_share["profile_share"] = df_demand_share[
766
        "building_amenity_share"
767
    ].multiply(df_demand_share["cell_share"])
768
769
    df_demand_share = df_demand_share[
770
        ["id", "bus_id", "scenario", "profile_share"]
771
    ]
772
    # TODO adapt groupby?
773
    # Group and aggregate per building for multi cell buildings
774
    df_demand_share = (
775
        df_demand_share.groupby(["scenario", "id", "bus_id"])
776
        .sum()
777
        .reset_index()
778
    )
779
    if df_demand_share.duplicated("id", keep=False).any():
780
        print(
781
            df_demand_share.loc[df_demand_share.duplicated("id", keep=False)]
782
        )
783
    return df_demand_share
784
785
786
def calc_building_profiles(
787
    egon_building_id=None,
788
    bus_id=None,
789
    scenario="eGon2035",
790
    sector="electricity",
791
):
792
    """
793
    Calculate the demand profile for each building. The profile is
794
    calculated by the demand share of the building per substation bus.
795
796
    Parameters
797
    ----------
798
    egon_building_id: int
799
        Id of the building for which the profile is calculated.
800
        If not given, the profiles are calculated for all buildings.
801
    bus_id: int
802
        Id of the substation for which the all profiles are calculated.
803
        If not given, the profiles are calculated for all buildings.
804
    scenario: str
805
        Scenario for which the share is calculated.
806
    sector: str
807
        Sector for which the share is calculated.
808
809
    Returns
810
    -------
811
    df_building_profiles: pd.DataFrame
812
        Table of demand profile per building
813
    """
814
    if sector == "electricity":
815
        with db.session_scope() as session:
816
            cells_query = session.query(
817
                EgonCtsElectricityDemandBuildingShare,
818
            ).filter(
819
                EgonCtsElectricityDemandBuildingShare.scenario == scenario
820
            )
821
822
        df_demand_share = pd.read_sql(
823
            cells_query.statement, cells_query.session.bind, index_col=None
824
        )
825
826
        # TODO maybe use demand.egon_etrago_electricity_cts
827
        # with db.session_scope() as session:
828
        #     cells_query = (
829
        #         session.query(
830
        #             EgonEtragoElectricityCts
831
        #         ).filter(
832
        #             EgonEtragoElectricityCts.scn_name == scenario)
833
        #     )
834
        #
835
        # df_cts_profiles = pd.read_sql(
836
        #     cells_query.statement,
837
        #     cells_query.session.bind,
838
        # )
839
        # df_cts_profiles = pd.DataFrame.from_dict(
840
        #   df_cts_profiles.set_index('bus_id')['p_set'].to_dict(),
841
        #   orient="index")
842
        df_cts_profiles = calc_load_curves_cts(scenario)
843
844
    elif sector == "heat":
845
        with db.session_scope() as session:
846
            cells_query = session.query(
847
                EgonCtsHeatDemandBuildingShare,
848
            ).filter(EgonCtsHeatDemandBuildingShare.scenario == scenario)
849
850
        df_demand_share = pd.read_sql(
851
            cells_query.statement, cells_query.session.bind, index_col=None
852
        )
853
854
        # TODO cts heat substation profiles missing
855
856
    # get demand share of selected building id
857
    if isinstance(egon_building_id, int):
858
        if egon_building_id in df_demand_share["id"]:
0 ignored issues
show
introduced by
The variable df_demand_share does not seem to be defined for all execution paths.
Loading history...
859
            df_demand_share = df_demand_share.loc[
860
                df_demand_share["id"] == egon_building_id
861
            ]
862
        else:
863
            raise KeyError(f"Building with id {egon_building_id} not found")
864
    # TODO maybe add list
865
    # elif isinstance(egon_building_id, list):
866
867
    # get demand share of all buildings for selected bus id
868
    if isinstance(bus_id, int):
869
        if bus_id in df_demand_share["bus_id"]:
870
            df_demand_share = df_demand_share.loc[
871
                df_demand_share["bus_id"] == bus_id
872
            ]
873
        else:
874
            raise KeyError(f"Bus with id {bus_id} not found")
875
876
    # get demand profile for all buildings for selected demand share
877
    # TODO takes a few seconds per iteration
878
    df_building_profiles = pd.DataFrame()
879
    for bus_id, df in df_demand_share.groupby("bus_id"):
880
        shares = df.set_index("id", drop=True)["profile_share"]
881
        profile = df_cts_profiles.loc[:, bus_id]
0 ignored issues
show
introduced by
The variable df_cts_profiles does not seem to be defined for all execution paths.
Loading history...
882
        # building_profiles = profile.apply(lambda x: x * shares)
883
        building_profiles = np.outer(profile, shares)
884
        building_profiles = pd.DataFrame(
885
            building_profiles, index=profile.index, columns=shares.index
886
        )
887
        df_building_profiles = pd.concat(
888
            [df_building_profiles, building_profiles], axis=1
889
        )
890
891
    return df_building_profiles
892
893
894
def delete_synthetic_cts_buildings():
895
    """
896
    All synthetic cts buildings are deleted from the DB. This is necessary if
897
    the task is run multiple times as the existing synthetic buildings
898
    influence the results.
899
    """
900
    # import db tables
901
    from saio.openstreetmap import osm_buildings_synthetic
902
903
    # cells mit amenities
904
    with db.session_scope() as session:
905
        session.query(osm_buildings_synthetic).filter(
906
            osm_buildings_synthetic.building == "cts"
907
        ).delete()
908
909
910
def cts_buildings():
911
    """
912
    Assigns CTS demand to buildings and calculates the respective demand
913
    profiles. The demand profile per substation are disaggregated per
914
    annual demand share of each census cell and by the number of amenities
915
    per building within the cell. If no building data is available,
916
    synthetic buildings are generated around the amenities. If no amenities
917
    but cts demand is available, buildings are randomly selected. If no
918
    building nor amenity is available, random synthetic buildings are
919
    generated. The demand share is stored in the database.
920
921
    Note:
922
    -----
923
    Cells with CTS demand, amenities and buildings do not change within
924
    the scenarios, only the demand itself. Therefore scenario eGon2035
925
    can be used universally to determine the cts buildings but not for
926
    he demand share.
927
    """
928
929
    log = start_logging()
930
    log.info("Start logging!")
931
    # Buildings with amenities
932
    df_buildings_with_amenities, df_lost_cells = buildings_with_amenities()
933
    log.info("Buildings with amenities selected!")
934
935
    # Median number of amenities per cell
936
    median_n_amenities = int(
937
        df_buildings_with_amenities.groupby("zensus_population_id")[
938
            "n_amenities_inside"
939
        ]
940
        .sum()
941
        .median()
942
    )
943
    # TODO remove
944
    print(f"Median amenity value: {median_n_amenities}")
945
946
    # Remove synthetic CTS buildings if existing
947
    delete_synthetic_cts_buildings()
948
    log.info("Old synthetic cts buildings deleted!")
949
950
    # Amenities not assigned to buildings
951
    df_amenities_without_buildings = amenities_without_buildings()
952
    log.info("Amenities without buildlings selected!")
953
954
    # Append lost cells due to duplicated ids, to cover all demand cells
955
    if not df_lost_cells.empty:
956
957
        df_lost_cells["amenities"] = median_n_amenities
958
        # create row for every amenity
959
        df_lost_cells["amenities"] = (
960
            df_lost_cells["amenities"].astype(int).apply(range)
961
        )
962
        df_lost_cells = df_lost_cells.explode("amenities")
963
        df_lost_cells.drop(columns="amenities", inplace=True)
964
        df_amenities_without_buildings = df_amenities_without_buildings.append(
965
            df_lost_cells, ignore_index=True
966
        )
967
        log.info("Lost cells due to substation intersection appended!")
968
969
    # One building per amenity
970
    df_amenities_without_buildings["n_amenities_inside"] = 1
971
    # Create synthetic buildings for amenites without buildings
972
    df_synthetic_buildings_with_amenities = create_synthetic_buildings(
973
        df_amenities_without_buildings, points="geom_amenity"
974
    )
975
    log.info("Synthetic buildings created!")
976
977
    # TODO write to DB and remove renaming
978
    write_table_to_postgis(
979
        df_synthetic_buildings_with_amenities.rename(
980
            columns={
981
                "zensus_population_id": "cell_id",
982
                "egon_building_id": "id",
983
            }
984
        ),
985
        OsmBuildingsSynthetic,
986
        drop=False,
987
    )
988
    log.info("Synthetic buildings exported to DB!")
989
990
    # Cells without amenities but CTS demand and buildings
991
    df_buildings_without_amenities = buildings_without_amenities()
992
    log.info("Buildings without amenities in demand cells identified!")
993
994
    # TODO Fix Adhoc Bugfix duplicated buildings
995
    # drop building ids which have already been used
996
    mask = df_buildings_without_amenities.loc[
997
        df_buildings_without_amenities["id"].isin(
998
            df_buildings_with_amenities["id"]
999
        )
1000
    ].index
1001
    df_buildings_without_amenities = df_buildings_without_amenities.drop(
1002
        index=mask
1003
    ).reset_index(drop=True)
1004
    log.info(f"{len(mask)} duplicated ids removed!")
1005
1006
    # select median n buildings per cell
1007
    df_buildings_without_amenities = select_cts_buildings(
1008
        df_buildings_without_amenities, max_n=median_n_amenities
1009
    )
1010
    df_buildings_without_amenities["n_amenities_inside"] = 1
1011
    log.info(f"{median_n_amenities} buildings per cell selected!")
1012
1013
    # Create synthetic amenities and buildings in cells with only CTS demand
1014
    df_cells_with_cts_demand_only = cells_with_cts_demand_only(
1015
        df_buildings_without_amenities
1016
    )
1017
    log.info("Cells with only demand identified!")
1018
1019
    # Median n Amenities per cell
1020
    df_cells_with_cts_demand_only["amenities"] = median_n_amenities
1021
    # create row for every amenity
1022
    df_cells_with_cts_demand_only["amenities"] = (
1023
        df_cells_with_cts_demand_only["amenities"].astype(int).apply(range)
1024
    )
1025
    df_cells_with_cts_demand_only = df_cells_with_cts_demand_only.explode(
1026
        "amenities"
1027
    )
1028
    df_cells_with_cts_demand_only.drop(columns="amenities", inplace=True)
1029
1030
    # Only 1 Amenity per Building
1031
    df_cells_with_cts_demand_only["n_amenities_inside"] = 1
1032
    df_cells_with_cts_demand_only = place_buildings_with_amenities(
1033
        df_cells_with_cts_demand_only, amenities=1
1034
    )
1035
    df_synthetic_buildings_without_amenities = create_synthetic_buildings(
1036
        df_cells_with_cts_demand_only, points="geom_point"
1037
    )
1038
    log.info(f"{median_n_amenities} synthetic buildings per cell created")
1039
1040
    # TODO write to DB and remove (backup) renaming
1041
    write_table_to_postgis(
1042
        df_synthetic_buildings_without_amenities.rename(
1043
            columns={
1044
                "zensus_population_id": "cell_id",
1045
                "egon_building_id": "id",
1046
            }
1047
        ),
1048
        OsmBuildingsSynthetic,
1049
        drop=False,
1050
    )
1051
    log.info("Synthetic buildings exported to DB")
1052
1053
    # Concat all buildings
1054
    columns = [
1055
        "zensus_population_id",
1056
        "id",
1057
        "geom_building",
1058
        "n_amenities_inside",
1059
        "source",
1060
    ]
1061
1062
    df_buildings_with_amenities["source"] = "bwa"
1063
    df_synthetic_buildings_with_amenities["source"] = "sbwa"
1064
    df_buildings_without_amenities["source"] = "bwoa"
1065
    df_synthetic_buildings_without_amenities["source"] = "sbwoa"
1066
1067
    df_cts_buildings = pd.concat(
1068
        [
1069
            df_buildings_with_amenities[columns],
1070
            df_synthetic_buildings_with_amenities[columns],
1071
            df_buildings_without_amenities[columns],
1072
            df_synthetic_buildings_without_amenities[columns],
1073
        ],
1074
        axis=0,
1075
        ignore_index=True,
1076
    )
1077
    # TODO maybe remove after #772
1078
    df_cts_buildings["id"] = df_cts_buildings["id"].astype(int)
1079
1080
    # Write table to db for debugging
1081
    # TODO remove later
1082
    df_cts_buildings = gpd.GeoDataFrame(
1083
        df_cts_buildings, geometry="geom_building", crs=3035
1084
    )
1085
    df_cts_buildings = df_cts_buildings.reset_index().rename(
1086
        columns={"index": "serial"}
1087
    )
1088
    write_table_to_postgis(
1089
        df_cts_buildings,
1090
        CtsBuildings,
1091
        drop=True,
1092
    )
1093
    log.info("CTS buildings exported to DB!")
1094
1095
1096 View Code Duplication
def cts_electricity():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1097
    """
1098
    Calculate cts electricity demand share of hvmv substation profile
1099
     for buildings.
1100
    """
1101
    log = start_logging()
1102
    log.info("Start logging!")
1103
    with db.session_scope() as session:
1104
        cells_query = session.query(CtsBuildings)
1105
1106
    df_cts_buildings = pd.read_sql(
1107
        cells_query.statement, cells_query.session.bind, index_col=None
1108
    )
1109
    log.info("CTS buildings from DB imported!")
1110
    df_demand_share_2035 = calc_building_demand_profile_share(
1111
        df_cts_buildings, scenario="eGon2035", sector="electricity"
1112
    )
1113
    log.info("Profile share for egon2035 calculated!")
1114
    df_demand_share_100RE = calc_building_demand_profile_share(
1115
        df_cts_buildings, scenario="eGon100RE", sector="electricity"
1116
    )
1117
    log.info("Profile share for egon100RE calculated!")
1118
    df_demand_share = pd.concat(
1119
        [df_demand_share_2035, df_demand_share_100RE],
1120
        axis=0,
1121
        ignore_index=True,
1122
    )
1123
1124
    write_table_to_postgres(
1125
        df_demand_share, EgonCtsElectricityDemandBuildingShare, drop=True
1126
    )
1127
    log.info("Profile share exported to DB!")
1128
1129
1130 View Code Duplication
def cts_heat():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1131
    """
1132
    Calculate cts electricity demand share of hvmv substation profile
1133
     for buildings.
1134
    """
1135
    log = start_logging()
1136
    log.info("Start logging!")
1137
    with db.session_scope() as session:
1138
        cells_query = session.query(CtsBuildings)
1139
1140
    df_cts_buildings = pd.read_sql(
1141
        cells_query.statement, cells_query.session.bind, index_col=None
1142
    )
1143
    log.info("CTS buildings from DB imported!")
1144
1145
    df_demand_share_2035 = calc_building_demand_profile_share(
1146
        df_cts_buildings, scenario="eGon2035", sector="heat"
1147
    )
1148
    log.info("Profile share for egon2035 calculated!")
1149
    df_demand_share_100RE = calc_building_demand_profile_share(
1150
        df_cts_buildings, scenario="eGon100RE", sector="heat"
1151
    )
1152
    log.info("Profile share for egon100RE calculated!")
1153
    df_demand_share = pd.concat(
1154
        [df_demand_share_2035, df_demand_share_100RE],
1155
        axis=0,
1156
        ignore_index=True,
1157
    )
1158
1159
    write_table_to_postgres(
1160
        df_demand_share, EgonCtsHeatDemandBuildingShare, drop=True
1161
    )
1162
    log.info("Profile share exported to DB!")
1163
1164
1165
def get_cts_electricity_peak_load():
1166
    """
1167
    Get peak load of all CTS buildings for both scenarios and store in DB.
1168
    """
1169
    log = start_logging()
1170
    log.info("Start logging!")
1171
    # Delete rows with cts demand
1172
    with db.session_scope() as session:
1173
        session.query(BuildingPeakLoads).filter(
1174
            BuildingPeakLoads.sector == "cts"
1175
        ).delete()
1176
    log.info("CTS Peak load removed from DB!")
1177
1178
    for scenario in ["eGon2035", "eGon100RE"]:
1179
        df_building_profiles = calc_building_profiles(scenario=scenario)
1180
        log.info(f"Profiles for {scenario} calculated!")
1181
        df_peak_load = df_building_profiles.max(axis=0).rename(scenario)
1182
        log.info("Peak load determined!")
1183
        df_peak_load = df_peak_load.reset_index()
1184
1185
        # TODO rename table column to egon_building_id
1186
        df_peak_load.rename(columns={"id": "building_id"}, inplace=True)
1187
        df_peak_load["sector"] = "cts"
1188
        df_peak_load = df_peak_load.melt(
1189
            id_vars=["building_id", "sector"],
1190
            var_name="scenario",
1191
            value_name="peak_load_in_w",
1192
        )
1193
        # Convert unit to W
1194
        df_peak_load["peak_load_in_w"] = df_peak_load["peak_load_in_w"] * 1e6
1195
1196
        # Write peak loads into db
1197
        with db.session_scope() as session:
1198
            session.bulk_insert_mappings(
1199
                BuildingPeakLoads,
1200
                df_peak_load.to_dict(orient="records"),
1201
            )
1202
        log.info("Peak load exported to DB!")
1203
1204
1205
class CtsElectricityBuildings(Dataset):
1206
    def __init__(self, dependencies):
1207
        super().__init__(
1208
            name="CtsElectricityBuildings",
1209
            version="0.0.0",
1210
            dependencies=dependencies,
1211
            tasks=(
1212
                cts_buildings,
1213
                {cts_electricity, cts_heat},
1214
                get_cts_electricity_peak_load,
1215
            ),
1216
        )
1217