Completed
Push — dev ( 8582b4...82307e )
by
unknown
30s queued 19s
created

reduce_synthetic_buildings()   A

Complexity

Conditions 2

Size

Total Lines 43
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 43
rs 9.75
c 0
b 0
f 0
cc 2
nop 2
1
"""
2
Household electricity demand time series for scenarios in 2035 and 2050
3
assigned to OSM-buildings.
4
5
"""
6
7
import random
8
9
from geoalchemy2 import Geometry
10
from sqlalchemy import REAL, Column, Integer, String, Table, func, inspect
11
from sqlalchemy.ext.declarative import declarative_base
12
import geopandas as gpd
13
import numpy as np
14
import pandas as pd
15
16
from egon.data import db
17
from egon.data.datasets import Dataset
18
from egon.data.datasets.electricity_demand_timeseries.hh_profiles import (
19
    HouseholdElectricityProfilesInCensusCells,
20
    get_iee_hh_demand_profiles_raw,
21
)
22
from egon.data.datasets.electricity_demand_timeseries.tools import (
23
    random_point_in_square,
24
)
25
import egon.data.config
26
27
engine = db.engine()
28
Base = declarative_base()
29
30
data_config = egon.data.config.datasets()
31
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"]
32
np.random.seed(RANDOM_SEED)
33
34
35
class HouseholdElectricityProfilesOfBuildings(Base):
36
    """
37
    Class definition of table demand.egon_household_electricity_profile_of_buildings.
38
39
    Mapping of demand timeseries and buildings and cell_id. This table is created within
40
    :py:func:`hh_buildings.map_houseprofiles_to_buildings()`.
41
42
    """
43
44
    __tablename__ = "egon_household_electricity_profile_of_buildings"
45
    __table_args__ = {"schema": "demand"}
46
47
    id = Column(Integer, primary_key=True)
48
    building_id = Column(Integer, index=True)
49
    cell_id = Column(Integer, index=True)
50
    profile_id = Column(String, index=True)
51
52
53
class HouseholdElectricityProfilesOfBuildingsStats(Base):
54
    """
55
    Class definition of table `demand.egon_household_electricity_profile_of_buildings_stats`.
56
    Contains number of households per building and type from table
57
    `demand.egon_household_electricity_profile_of_buildings`
58
59
    Columns
60
    -------
61
    building_id: Building id as used in tables `openstreetmap.osm_buildings_*`, index col
62
    households_total: total count of households
63
    SR: count of household type SR single retiree
64
    SO: count of household type SA single adults
65
    PR: count of household type PR pair retiree
66
    PO: count of household type PA pair adults
67
    SK: count of household type SK single n children
68
    P1: count of household type P1 pair 1 child
69
    P2: count of household type P2 pair 2 children
70
    P3: count of household type P3 pair 3 children
71
    OR: count of household type OR multi retiree n children
72
    OO: count of household type OO multi adults n children
73
    """
74
75
    __tablename__ = "egon_household_electricity_profile_of_buildings_stats"
76
    __table_args__ = {"schema": "demand"}
77
78
    building_id = Column(Integer, primary_key=True)
79
    households_total = Column(Integer, nullable=True)
80
    SR = Column(Integer, nullable=True)
81
    SO = Column(Integer, nullable=True)
82
    PR = Column(Integer, nullable=True)
83
    PO = Column(Integer, nullable=True)
84
    SK = Column(Integer, nullable=True)
85
    P1 = Column(Integer, nullable=True)
86
    P2 = Column(Integer, nullable=True)
87
    P3 = Column(Integer, nullable=True)
88
    OR = Column(Integer, nullable=True)
89
    OO = Column(Integer, nullable=True)
90
91
92
class OsmBuildingsSynthetic(Base):
93
    """
94
    Class definition of table demand.osm_buildings_synthetic.
95
96
    Lists generated synthetic building with id, zensus_population_id and
97
    building type. This table is created within
98
    :py:func:`hh_buildings.map_houseprofiles_to_buildings()`.
99
    """
100
101
    __tablename__ = "osm_buildings_synthetic"
102
    __table_args__ = {"schema": "openstreetmap"}
103
104
    id = Column(String, primary_key=True)
105
    cell_id = Column(String, index=True)
106
    geom_building = Column(Geometry("Polygon", 3035), index=True)
107
    geom_point = Column(Geometry("POINT", 3035))
108
    n_amenities_inside = Column(Integer)
109
    building = Column(String(11))
110
    area = Column(REAL)
111
112
113
class BuildingElectricityPeakLoads(Base):
114
    """
115
    Class definition of table demand.egon_building_electricity_peak_loads.
116
117
    Mapping of electricity demand time series and buildings including cell_id,
118
    building area and peak load. This table is created within
119
    :func:`hh_buildings.get_building_peak_loads()`.
120
    """
121
122
    __tablename__ = "egon_building_electricity_peak_loads"
123
    __table_args__ = {"schema": "demand"}
124
125
    building_id = Column(Integer, primary_key=True)
126
    scenario = Column(String, primary_key=True)
127
    sector = Column(String, primary_key=True)
128
    peak_load_in_w = Column(REAL)
129
    voltage_level = Column(Integer, index=True)
130
131
132
def match_osm_and_zensus_data(
133
    egon_hh_profile_in_zensus_cell,
134
    egon_map_zensus_buildings_residential,
135
):
136
    """
137
    Compares OSM buildings and census hh demand profiles.
138
139
    OSM building data and hh demand profiles based on census data is compared.
140
    Census cells with only profiles but no osm-ids are identified to generate
141
    synthetic buildings. Census building count is used, if available, to define
142
    number of missing buildings. Otherwise, we use a twofold approach for the
143
    rate: first, the rate is calculated using adjacent cells (function
144
    `find_adjacent_cells()`), a distance of 3 cells in each direction is used
145
    by default (resulting in a 7x7 lookup matrix). As fallback, the overall
146
    median profile/building rate is used to derive the number of buildings
147
    from the number of already generated demand profiles.
148
149
    Parameters
150
    ----------
151
    egon_hh_profile_in_zensus_cell: pd.DataFrame
152
        Table mapping hh demand profiles to census cells
153
154
    egon_map_zensus_buildings_residential: pd.DataFrame
155
        Table with buildings osm-id and cell_id
156
157
    Returns
158
    -------
159
    pd.DataFrame
160
        Table with cell_ids and number of missing buildings
161
    """
162
163
    def find_adjacent_cells(row, adj_cell_radius):
164
        """
165
        Find adjacent cells for cell by iterating over census grid ids
166
        (100mN...E...).
167
168
        Parameters
169
        ----------
170
        row : Dataframe row
171
            Dataframe row
172
        adj_cell_radius : int
173
            distance of cells in each direction to find cells,
174
            e.g. adj_cell_radius=3 -> 7x7 cell matrix
175
176
        Returns
177
        -------
178
        tuples of int
179
            N coordinates, E coordinates in format
180
            [(N_cell_1, E_cell_1), ..., (N_cell_n, E_cell_n)]
181
        """
182
        return [
183
            f"100mN{_[0]}E{_[1]}"
184
            for _ in np.array(
185
                np.meshgrid(
186
                    np.arange(
187
                        row.N - adj_cell_radius, row.N + adj_cell_radius + 1
188
                    ),
189
                    np.arange(
190
                        row.E - adj_cell_radius, row.E + adj_cell_radius + 1
191
                    ),
192
                )
193
            ).T.reshape(-1, 2)
194
        ]
195
196
    # count number of profiles for each cell
197
    profiles_per_cell = egon_hh_profile_in_zensus_cell.cell_profile_ids.apply(
198
        len
199
    )
200
201
    # Add number of profiles per cell
202
    number_of_buildings_profiles_per_cell = pd.merge(
203
        left=profiles_per_cell,
204
        right=egon_hh_profile_in_zensus_cell["cell_id"],
205
        left_index=True,
206
        right_index=True,
207
    )
208
209
    # count buildings/ids for each cell
210
    buildings_per_cell = egon_map_zensus_buildings_residential.groupby(
211
        "cell_id"
212
    )["id"].count()
213
    buildings_per_cell = buildings_per_cell.rename("building_ids")
214
215
    # add buildings left join to have all the cells with assigned profiles
216
    number_of_buildings_profiles_per_cell = pd.merge(
217
        left=number_of_buildings_profiles_per_cell,
218
        right=buildings_per_cell,
219
        left_on="cell_id",
220
        right_index=True,
221
        how="left",
222
    )
223
224
    # identify cell ids with profiles but no buildings
225
    number_of_buildings_profiles_per_cell = (
226
        number_of_buildings_profiles_per_cell.fillna(0).astype(int)
227
    )
228
    missing_buildings = number_of_buildings_profiles_per_cell.loc[
229
        number_of_buildings_profiles_per_cell.building_ids == 0,
230
        ["cell_id", "cell_profile_ids"],
231
    ].set_index("cell_id")
232
233
    # query zensus building count
234
    egon_destatis_building_count = Table(
235
        "egon_destatis_zensus_apartment_building_population_per_ha",
236
        Base.metadata,
237
        schema="society",
238
    )
239
    # get table metadata from db by name and schema
240
    inspect(engine).reflecttable(egon_destatis_building_count, None)
241
242
    with db.session_scope() as session:
243
        cells_query = session.query(
244
            egon_destatis_building_count.c.zensus_population_id,
245
            egon_destatis_building_count.c.building_count,
246
        )
247
248
    egon_destatis_building_count = pd.read_sql(
249
        cells_query.statement,
250
        cells_query.session.bind,
251
        index_col="zensus_population_id",
252
    )
253
    egon_destatis_building_count = egon_destatis_building_count.dropna()
254
255
    missing_buildings = pd.merge(
256
        left=missing_buildings,
257
        right=egon_destatis_building_count,
258
        left_index=True,
259
        right_index=True,
260
        how="left",
261
    )
262
263
    # exclude cells without buildings
264
    only_cells_with_buildings = (
265
        number_of_buildings_profiles_per_cell["building_ids"] != 0
266
    )
267
    # get profile/building rate for each cell
268
    profile_building_rate = (
269
        number_of_buildings_profiles_per_cell.loc[
270
            only_cells_with_buildings, "cell_profile_ids"
271
        ]
272
        / number_of_buildings_profiles_per_cell.loc[
273
            only_cells_with_buildings, "building_ids"
274
        ]
275
    )
276
277
    # prepare values for missing building counts by number of profile ids
278
    building_count_fillna = missing_buildings.loc[
279
        missing_buildings["building_count"].isna(), "cell_profile_ids"
280
    ]
281
    # devide by median profile/building rate
282
    building_count_fillna = (
283
        building_count_fillna / profile_building_rate.median()
284
    )
285
    # replace missing building counts
286
    missing_buildings["building_count"] = missing_buildings[
287
        "building_count"
288
    ].fillna(value=building_count_fillna)
289
290
    # ========== START Update profile/building rate in cells w/o bld using adjacent cells ==========
291
    missing_buildings_temp = (
292
        egon_hh_profile_in_zensus_cell[["cell_id", "grid_id"]]
293
        .set_index("cell_id")
294
        .loc[missing_buildings.index.unique()]
295
    )
296
297
    # Extract coordinates
298
    missing_buildings_temp = pd.concat(
299
        [
300
            missing_buildings_temp,
301
            missing_buildings_temp.grid_id.str.extract(r"100mN(\d+)E(\d+)")
302
            .astype(int)
303
            .rename(columns={0: "N", 1: "E"}),
304
        ],
305
        axis=1,
306
    )
307
308
    # Find adjacent cells for cell
309
    missing_buildings_temp["cell_adj"] = missing_buildings_temp.apply(
310
        find_adjacent_cells, adj_cell_radius=3, axis=1
311
    )
312
    missing_buildings_temp = (
313
        missing_buildings_temp.explode("cell_adj")
314
        .drop(columns=["grid_id", "N", "E"])
315
        .reset_index()
316
    )
317
318
    # Create mapping table cell -> adjacent cells
319
    missing_buildings_temp = (
320
        missing_buildings_temp.set_index("cell_adj")
321
        .join(
322
            egon_hh_profile_in_zensus_cell.set_index("grid_id").cell_id,
323
            rsuffix="_adj",
324
        )
325
        .dropna()
326
        .set_index("cell_id_adj")
327
    )
328
329
    # Calculate profile/building rate for those cells
330
    profile_building_rate.name = "profile_building_rate"
331
    missing_buildings_temp = missing_buildings_temp.join(
332
        number_of_buildings_profiles_per_cell[["cell_id"]]
333
        .join(profile_building_rate)
334
        .set_index("cell_id")
335
    )
336
    missing_buildings_temp = (
337
        missing_buildings_temp.groupby("cell_id").median().dropna()
338
    )
339
340
    # Update mising buildings
341
    missing_buildings["building_count"] = (
342
        missing_buildings.cell_profile_ids.div(
343
            missing_buildings_temp.profile_building_rate
344
        ).fillna(missing_buildings.building_count)
345
    )
346
    # ========== END Update profile/building rate in cells w/o bld using adjacent cells ==========
347
348
    # ceil to have at least one building each cell and make type int
349
    missing_buildings = missing_buildings.apply(np.ceil).astype(int)
350
    # generate list of building ids for each cell
351
    missing_buildings["building_count"] = missing_buildings[
352
        "building_count"
353
    ].apply(range)
354
    missing_buildings = missing_buildings.explode(column="building_count")
355
356
    return missing_buildings
357
358
359
def generate_synthetic_buildings(missing_buildings, edge_length):
360
    """
361
    Generate synthetic square buildings in census cells for every entry
362
    in missing_buildings.
363
364
    Generate random placed synthetic buildings incl geom data within the bounds
365
    of the cencus cell. Buildings have each a square area with edge_length^2.
366
367
368
    Parameters
369
    ----------
370
    missing_buildings: pd.Series or pd.DataFrame
371
        Table with cell_ids and building number
372
    edge_length: int
373
        Edge length of square synthetic building in meter
374
375
    Returns
376
    -------
377
    pd.DataFrame
378
        Table with generated synthetic buildings, area, cell_id and geom data
379
380
    """
381
    destatis_zensus_population_per_ha_inside_germany = Table(
382
        "destatis_zensus_population_per_ha_inside_germany",
383
        Base.metadata,
384
        schema="society",
385
    )
386
    # get table metadata from db by name and schema
387
    inspect(engine).reflecttable(
388
        destatis_zensus_population_per_ha_inside_germany, None
389
    )
390
391
    with db.session_scope() as session:
392
        cells_query = session.query(
393
            destatis_zensus_population_per_ha_inside_germany
394
        ).filter(
395
            destatis_zensus_population_per_ha_inside_germany.c.id.in_(
396
                missing_buildings.index.unique()
397
            )
398
        )
399
400
    destatis_zensus_population_per_ha_inside_germany = gpd.read_postgis(
401
        cells_query.statement, cells_query.session.bind, index_col="id"
402
    )
403
404
    # add geom data of zensus cell
405
    missing_buildings_geom = pd.merge(
406
        left=destatis_zensus_population_per_ha_inside_germany[["geom"]],
407
        right=missing_buildings,
408
        left_index=True,
409
        right_index=True,
410
        how="right",
411
    )
412
413
    missing_buildings_geom = missing_buildings_geom.reset_index(drop=False)
414
    missing_buildings_geom = missing_buildings_geom.rename(
415
        columns={
416
            "building_count": "building_id",
417
            "cell_profile_ids": "profiles",
418
            "id": "cell_id",
419
        }
420
    )
421
422
    # create random points within census cells
423
    points = random_point_in_square(
424
        geom=missing_buildings_geom["geom"], tol=edge_length / 2
425
    )
426
427
    # Store center of poylon
428
    missing_buildings_geom["geom_point"] = points
429
    # Create building using a square around point
430
    missing_buildings_geom["geom_building"] = points.buffer(
431
        distance=edge_length / 2, cap_style=3
432
    )
433
    missing_buildings_geom = missing_buildings_geom.drop(columns=["geom"])
434
    missing_buildings_geom = gpd.GeoDataFrame(
435
        missing_buildings_geom, crs="EPSG:3035", geometry="geom_building"
436
    )
437
438
    # get table metadata from db by name and schema
439
    buildings = Table("osm_buildings", Base.metadata, schema="openstreetmap")
440
    inspect(engine).reflecttable(buildings, None)
441
442
    # get max number of building ids from non-filtered building table
443
    with db.session_scope() as session:
444
        buildings = session.execute(func.max(buildings.c.id)).scalar()
445
446
    # apply ids following the sequence of openstreetmap.osm_buildings id
447
    missing_buildings_geom["id"] = range(
448
        buildings + 1,
449
        buildings + len(missing_buildings_geom) + 1,
450
    )
451
452
    drop_columns = [
453
        i
454
        for i in ["building_id", "profiles"]
455
        if i in missing_buildings_geom.columns
456
    ]
457
    if drop_columns:
458
        missing_buildings_geom = missing_buildings_geom.drop(
459
            columns=drop_columns
460
        )
461
462
    missing_buildings_geom["building"] = "residential"
463
    missing_buildings_geom["area"] = missing_buildings_geom[
464
        "geom_building"
465
    ].area
466
467
    return missing_buildings_geom
468
469
470
def generate_mapping_table(
471
    egon_map_zensus_buildings_residential_synth,
472
    egon_hh_profile_in_zensus_cell,
473
):
474
    """
475
    Generate a mapping table for hh profiles to buildings.
476
477
    All hh demand profiles are randomly assigned to buildings within the same
478
    cencus cell.
479
480
    * profiles > buildings: buildings can have multiple profiles but every
481
        building gets at least one profile
482
    * profiles < buildings: not every building gets a profile
483
484
485
    Parameters
486
    ----------
487
    egon_map_zensus_buildings_residential_synth: pd.DataFrame
488
        Table with OSM and synthetic buildings ids per census cell
489
    egon_hh_profile_in_zensus_cell: pd.DataFrame
490
        Table mapping hh demand profiles to census cells
491
492
    Returns
493
    -------
494
    pd.DataFrame
495
        Table with mapping of profile ids to buildings with OSM ids
496
497
    """
498
499
    def create_pool(buildings, profiles):
500
        if profiles > buildings:
501
            surplus = profiles - buildings
502
            surplus = rng.integers(0, buildings, surplus)
503
            pool = list(range(buildings)) + list(surplus)
504
        else:
505
            pool = list(range(buildings))
506
        result = random.sample(population=pool, k=profiles)
507
508
        return result
509
510
    # group oms_ids by census cells and aggregate to list
511
    osm_ids_per_cell = (
512
        egon_map_zensus_buildings_residential_synth[["id", "cell_id"]]
513
        .groupby("cell_id")
514
        .agg(list)
515
    )
516
517
    # cell ids of cells with osm ids
518
    cells_with_buildings = osm_ids_per_cell.index.astype(int).values
519
    # cell ids of cells with profiles
520
    cells_with_profiles = (
521
        egon_hh_profile_in_zensus_cell["cell_id"].astype(int).values
522
    )
523
    # cell ids of cells with osm ids and profiles
524
    cell_with_profiles_and_buildings = np.intersect1d(
525
        cells_with_profiles, cells_with_buildings
526
    )
527
528
    # cells with only buildings might not be residential etc.
529
530
    # reduced list of profile_ids per cell with both buildings and profiles
531
    profile_ids_per_cell_reduced = egon_hh_profile_in_zensus_cell.set_index(
532
        "cell_id"
533
    ).loc[cell_with_profiles_and_buildings, "cell_profile_ids"]
534
    # reduced list of osm_ids per cell with both buildings and profiles
535
    osm_ids_per_cell_reduced = osm_ids_per_cell.loc[
536
        cell_with_profiles_and_buildings, "id"
537
    ].rename("building_ids")
538
539
    # concat both lists by same cell_id
540
    mapping_profiles_to_buildings_reduced = pd.concat(
541
        [profile_ids_per_cell_reduced, osm_ids_per_cell_reduced], axis=1
542
    )
543
544
    # count number of profiles and buildings for each cell
545
    # tells how many profiles have to be assigned to how many buildings
546
    number_profiles_and_buildings_reduced = (
547
        mapping_profiles_to_buildings_reduced.applymap(len)
548
    )
549
550
    # map profiles randomly per cell
551
    # if profiles > buildings, every building will get at least one profile
552
    rng = np.random.default_rng(RANDOM_SEED)
553
    random.seed(RANDOM_SEED)
554
    mapping_profiles_to_buildings = pd.Series(
555
        [
556
            create_pool(buildings, profiles)
557
            for buildings, profiles in zip(
558
                number_profiles_and_buildings_reduced["building_ids"].values,
559
                number_profiles_and_buildings_reduced[
560
                    "cell_profile_ids"
561
                ].values,
562
            )
563
        ],
564
        index=number_profiles_and_buildings_reduced.index,
565
    )
566
567
    # unnest building assignement per cell
568
    mapping_profiles_to_buildings = (
569
        mapping_profiles_to_buildings.rename("building")
570
        .explode()
571
        .reset_index()
572
    )
573
    # add profile position as attribute by number of entries per cell (*)
574
    mapping_profiles_to_buildings["profile"] = (
575
        mapping_profiles_to_buildings.groupby(["cell_id"]).cumcount()
576
    )
577
    # get multiindex of profiles in cells (*)
578
    index_profiles = mapping_profiles_to_buildings.set_index(
579
        ["cell_id", "profile"]
580
    ).index
581
    # get multiindex of buildings in cells (*)
582
    index_buildings = mapping_profiles_to_buildings.set_index(
583
        ["cell_id", "building"]
584
    ).index
585
586
    # get list of profiles by cell and profile position
587
    profile_ids_per_cell_reduced = (
588
        profile_ids_per_cell_reduced.explode().reset_index()
589
    )
590
    # assign profile position by order of list
591
    profile_ids_per_cell_reduced["profile"] = (
592
        profile_ids_per_cell_reduced.groupby(["cell_id"]).cumcount()
593
    )
594
    profile_ids_per_cell_reduced = profile_ids_per_cell_reduced.set_index(
595
        ["cell_id", "profile"]
596
    )
597
598
    # get list of building by cell and building number
599
    osm_ids_per_cell_reduced = osm_ids_per_cell_reduced.explode().reset_index()
600
    # assign building number by order of list
601
    osm_ids_per_cell_reduced["building"] = osm_ids_per_cell_reduced.groupby(
602
        ["cell_id"]
603
    ).cumcount()
604
    osm_ids_per_cell_reduced = osm_ids_per_cell_reduced.set_index(
605
        ["cell_id", "building"]
606
    )
607
608
    # map profiles and buildings by profile position and building number
609
    # merge is possible as both index results from the same origin (*) and are
610
    # not rearranged, therefore in the same order
611
    mapping_profiles_to_buildings = pd.merge(
612
        osm_ids_per_cell_reduced.loc[index_buildings].reset_index(drop=False),
613
        profile_ids_per_cell_reduced.loc[index_profiles].reset_index(
614
            drop=True
615
        ),
616
        left_index=True,
617
        right_index=True,
618
    )
619
620
    # rename columns
621
    mapping_profiles_to_buildings.rename(
622
        columns={
623
            "building_ids": "building_id",
624
            "cell_profile_ids": "profile_id",
625
        },
626
        inplace=True,
627
    )
628
629
    return mapping_profiles_to_buildings
630
631
632
def reduce_synthetic_buildings(
633
    mapping_profiles_to_buildings, synthetic_buildings
634
):
635
    """Reduced list of synthetic buildings to amount actually used.
636
637
    Not all are used, due to randomised assignment with replacing
638
    Id's are adapted to continuous number sequence following
639
    openstreetmap.osm_buildings"""
640
641
    buildings = Table("osm_buildings", Base.metadata, schema="openstreetmap")
642
    # get table metadata from db by name and schema
643
    inspect(engine).reflecttable(buildings, None)
644
645
    # total number of buildings
646
    with db.session_scope() as session:
647
        buildings = session.execute(func.max(buildings.c.id)).scalar()
648
649
    synth_ids_used = mapping_profiles_to_buildings.loc[
650
        mapping_profiles_to_buildings["building_id"] > buildings,
651
        "building_id",
652
    ].unique()
653
654
    synthetic_buildings = synthetic_buildings.loc[
655
        synthetic_buildings["id"].isin(synth_ids_used)
656
    ]
657
    # id_mapping = dict(
658
    #     list(
659
    #         zip(
660
    #             synth_ids_used,
661
    #             range(
662
    #                 buildings,
663
    #                 buildings
664
    #                 + len(synth_ids_used) + 1
665
    #             )
666
    #         )
667
    #     )
668
    # )
669
670
    # time expensive because of regex
671
    # mapping_profiles_to_buildings['building_id'] = (
672
    #     mapping_profiles_to_buildings['building_id'].replace(id_mapping)
673
    # )
674
    return synthetic_buildings
675
676
677
def get_building_peak_loads():
678
    """
679
    Peak loads of buildings are determined.
680
681
    Timeseries for every building are accumulated, the maximum value
682
    determined and with the respective nuts3 factor scaled for 2035 and 2050
683
    scenario.
684
685
    Note
686
    ----------
687
    In test-mode 'SH' the iteration takes place by 'cell_id' to avoid
688
    intensive RAM usage. For whole Germany 'nuts3' are taken and
689
    RAM > 32GB is necessary.
690
    """
691
692
    with db.session_scope() as session:
693
        cells_query = (
694
            session.query(
695
                HouseholdElectricityProfilesOfBuildings,
696
                HouseholdElectricityProfilesInCensusCells.nuts3,
697
                HouseholdElectricityProfilesInCensusCells.factor_2019,
698
                HouseholdElectricityProfilesInCensusCells.factor_2023,
699
                HouseholdElectricityProfilesInCensusCells.factor_2035,
700
                HouseholdElectricityProfilesInCensusCells.factor_2050,
701
            )
702
            .filter(
703
                HouseholdElectricityProfilesOfBuildings.cell_id
704
                == HouseholdElectricityProfilesInCensusCells.cell_id
705
            )
706
            .order_by(HouseholdElectricityProfilesOfBuildings.id)
707
        )
708
709
        df_buildings_and_profiles = pd.read_sql(
710
            cells_query.statement, cells_query.session.bind, index_col="id"
711
        )
712
713
        # fill columns with None with np.nan to allow multiplication with emtpy columns
714
        df_buildings_and_profiles = df_buildings_and_profiles.fillna(np.nan)
715
716
        # Read demand profiles from egon-data-bundle
717
        df_profiles = get_iee_hh_demand_profiles_raw()
718
719
        def ve(s):
720
            raise (ValueError(s))
721
722
        dataset = egon.data.config.settings()["egon-data"][
723
            "--dataset-boundary"
724
        ]
725
        iterate_over = (
726
            "nuts3"
727
            if dataset == "Everything"
728
            else (
729
                "cell_id"
730
                if dataset == "Schleswig-Holstein"
731
                else ve(f"'{dataset}' is not a valid dataset boundary.")
732
            )
733
        )
734
735
        df_building_peak_loads = pd.DataFrame()
736
737
        for nuts3, df in df_buildings_and_profiles.groupby(by=iterate_over):
738
            df_building_peak_load_nuts3 = df_profiles.loc[:, df.profile_id]
739
740
            m_index = pd.MultiIndex.from_arrays(
741
                [df.profile_id, df.building_id],
742
                names=("profile_id", "building_id"),
743
            )
744
            df_building_peak_load_nuts3.columns = m_index
745
            df_building_peak_load_nuts3 = (
746
                df_building_peak_load_nuts3.groupby("building_id", axis=1)
747
                .sum()
748
                .max()
749
            )
750
751
            df_building_peak_load_nuts3 = pd.DataFrame(
752
                [
753
                    df_building_peak_load_nuts3 * df["factor_2019"].unique(),
754
                    df_building_peak_load_nuts3 * df["factor_2023"].unique(),
755
                    df_building_peak_load_nuts3 * df["factor_2035"].unique(),
756
                    df_building_peak_load_nuts3 * df["factor_2050"].unique(),
757
                ],
758
                index=[
759
                    "status2019",
760
                    "status2023",
761
                    "eGon2035",
762
                    "eGon100RE",
763
                ],
764
            ).T
765
766
            df_building_peak_loads = pd.concat(
767
                [df_building_peak_loads, df_building_peak_load_nuts3], axis=0
768
            )
769
770
        df_building_peak_loads.reset_index(inplace=True)
771
        df_building_peak_loads["sector"] = "residential"
772
773
        BuildingElectricityPeakLoads.__table__.drop(
774
            bind=engine, checkfirst=True
775
        )
776
        BuildingElectricityPeakLoads.__table__.create(
777
            bind=engine, checkfirst=True
778
        )
779
780
        df_building_peak_loads = df_building_peak_loads.melt(
781
            id_vars=["building_id", "sector"],
782
            var_name="scenario",
783
            value_name="peak_load_in_w",
784
        )
785
786
        # Write peak loads into db
787
        with db.session_scope() as session:
788
            session.bulk_insert_mappings(
789
                BuildingElectricityPeakLoads,
790
                df_building_peak_loads.to_dict(orient="records"),
791
            )
792
793
794
def map_houseprofiles_to_buildings():
795
    """
796
    Census hh demand profiles are assigned to residential buildings via osm ids.
797
    If no OSM ids are available, synthetic buildings are generated. A list of the
798
    generated buildings and supplementary data as well as the mapping table is stored
799
    in the db.
800
801
    **Tables**
802
803
    synthetic_buildings:
804
        schema: openstreetmap
805
        tablename: osm_buildings_synthetic
806
807
    mapping_profiles_to_buildings:
808
        schema: demand
809
        tablename: egon_household_electricity_profile_of_buildings
810
811
    """
812
    # ========== Get census cells ==========
813
    egon_census_cells = Table(
814
        "egon_destatis_zensus_apartment_building_population_per_ha",
815
        Base.metadata,
816
        schema="society",
817
    )
818
    inspect(engine).reflecttable(egon_census_cells, None)
819
820
    with db.session_scope() as session:
821
        cells_query = session.query(
822
            egon_census_cells.c.zensus_population_id,
823
            egon_census_cells.c.population,
824
            egon_census_cells.c.geom,
825
        ).order_by(egon_census_cells.c.zensus_population_id)
826
        gdf_egon_census_cells = gpd.read_postgis(
827
            cells_query.statement, cells_query.session.bind, geom_col="geom"
828
        )
829
830
    # ========== Get residential buildings ==========
831
    egon_osm_buildings_residential = Table(
832
        "osm_buildings_residential",
833
        Base.metadata,
834
        schema="openstreetmap",
835
    )
836
    inspect(engine).reflecttable(egon_osm_buildings_residential, None)
837
838
    with db.session_scope() as session:
839
        cells_query = session.query(
840
            egon_osm_buildings_residential.c.id.label("building_id"),
841
            egon_osm_buildings_residential.c.geom_building,
842
        ).order_by(egon_osm_buildings_residential.c.id)
843
        gdf_egon_osm_buildings = gpd.read_postgis(
844
            cells_query.statement,
845
            cells_query.session.bind,
846
            geom_col="geom_building",
847
        )
848
849
    # ========== Clip buildings centroids with census cells to get main buildings ==========
850
851
    # Copy buildings and set centroid as geom
852
    gdf_egon_osm_buildings_main = gdf_egon_osm_buildings.copy()
853
    gdf_egon_osm_buildings_main["geom_point"] = gdf_egon_osm_buildings_main.centroid
854
    gdf_egon_osm_buildings_main = gdf_egon_osm_buildings_main.drop(
855
        columns=["geom_building"]).set_geometry("geom_point")
856
857
    egon_map_zensus_buildings_residential_main = gpd.sjoin(
858
        gdf_egon_osm_buildings_main,
859
        gdf_egon_census_cells,
860
        how="inner",
861
        predicate="within"
862
    )[["building_id", "zensus_population_id"]].rename(columns={"zensus_population_id": "cell_id"})
863
864
    # ========== Clip buildings with census cells to get building parts ==========
865
866
    # Clip to create new build parts as buildings
867
    gdf_egon_osm_buildings_census_cells = gdf_egon_census_cells.overlay(
868
        gdf_egon_osm_buildings, how="intersection"
869
    )
870
871
    # Remove main buildings which are not located in populated census cells
872
    buildings_centroid_not_in_census_cells = gdf_egon_osm_buildings_census_cells.loc[
873
        ~gdf_egon_osm_buildings_census_cells.building_id.isin(
874
            egon_map_zensus_buildings_residential_main.building_id)]
875
    gdf_egon_osm_buildings_census_cells = gdf_egon_osm_buildings_census_cells.loc[
876
        ~gdf_egon_osm_buildings_census_cells.building_id.isin(
877
            buildings_centroid_not_in_census_cells.building_id.to_list())
878
    ]
879
880
    gdf_egon_osm_buildings_census_cells["geom_point"] = (
881
        gdf_egon_osm_buildings_census_cells.centroid
882
    )
883
884
    # Add column with unique building ids using suffixes (building parts split by clipping)
885
    gdf_egon_osm_buildings_census_cells["building_id_temp"] = (
886
        gdf_egon_osm_buildings_census_cells["building_id"].astype(str)
887
    )
888
    g = (
889
        gdf_egon_osm_buildings_census_cells.groupby("building_id_temp")
890
        .cumcount()
891
        .add(1)
892
        .astype(str)
893
    )
894
    gdf_egon_osm_buildings_census_cells["building_id_temp"] += "_" + g
895
896
    # Check
897
    try:
898
        assert len(
899
            gdf_egon_osm_buildings_census_cells.building_id_temp.unique()
900
        ) == len(gdf_egon_osm_buildings_census_cells)
901
    except AssertionError:
902
        print(
903
            "The length of split buildings do not match with original count."
904
        )
905
906
    egon_map_zensus_buildings_residential = (
907
        gdf_egon_osm_buildings_census_cells[
908
            ["zensus_population_id", "building_id_temp"]
909
        ].rename(
910
            columns={
911
                "zensus_population_id": "cell_id",
912
                "building_id_temp": "id",
913
            }
914
        )
915
    )
916
917
    # Get household profile to census cells allocations
918
    with db.session_scope() as session:
919
        cells_query = session.query(HouseholdElectricityProfilesInCensusCells)
920
    egon_hh_profile_in_zensus_cell = pd.read_sql(
921
        cells_query.statement, cells_query.session.bind, index_col=None
922
    )
923
924
    # Match OSM and zensus data to define missing buildings
925
    missing_buildings = match_osm_and_zensus_data(
926
        egon_hh_profile_in_zensus_cell,
927
        egon_map_zensus_buildings_residential,
928
    )
929
930
    # randomly generate synthetic buildings in cell without any
931
    synthetic_buildings = generate_synthetic_buildings(
932
        missing_buildings, edge_length=5
933
    )
934
935
    # add synthetic buildings to df
936
    egon_map_zensus_buildings_residential_synth = pd.concat(
937
        [
938
            egon_map_zensus_buildings_residential,
939
            synthetic_buildings[["id", "cell_id"]],
940
        ],
941
        ignore_index=True,
942
    )
943
944
    # assign profiles to buildings
945
    mapping_profiles_to_buildings = generate_mapping_table(
946
        egon_map_zensus_buildings_residential_synth,
947
        egon_hh_profile_in_zensus_cell,
948
    )
949
950
    # remove suffixes from buildings split into parts before to merge them back together
951
    mapping_profiles_to_buildings["building_id"] = (
952
        mapping_profiles_to_buildings.building_id.astype(str).apply(
953
            lambda s: s.split("_")[0] if "_" in s else s
954
        )
955
    )
956
    mapping_profiles_to_buildings["building_id"] = (
957
        mapping_profiles_to_buildings["building_id"].astype(int)
958
    )
959
960
    # reduce list to only used synthetic buildings
961
    synthetic_buildings = reduce_synthetic_buildings(
962
        mapping_profiles_to_buildings, synthetic_buildings
963
    )
964
    synthetic_buildings["n_amenities_inside"] = 0
965
966
    # ========== Reallocate profiles from building part to main building (correct cell_id) ==========
967
    # cf. https://github.com/openego/eGon-data/issues/1190
968
969
    # Get and allocate main building_id
970
    egon_map_zensus_buildings_residential_main = pd.merge(
971
        mapping_profiles_to_buildings[["cell_id", "building_id"]],
972
        egon_map_zensus_buildings_residential_main,
973
        on='building_id',
974
        how='left',
975
        suffixes=('_df1', '_df2')
976
    ).dropna()
977
    egon_map_zensus_buildings_residential_main[
978
        "cell_id_df2"] = egon_map_zensus_buildings_residential_main["cell_id_df2"].astype(int)
979
    mapping_profiles_to_buildings2 = mapping_profiles_to_buildings.copy()
980
    mapping_profiles_to_buildings["cell_id"] = egon_map_zensus_buildings_residential_main["cell_id_df2"]
981
982
    # Retain original values where no main building has been found
983
    # (centroid of building part not in a cell)
984
    mapping_profiles_to_buildings["cell_id"].fillna(mapping_profiles_to_buildings2["cell_id"], inplace=True)
985
    mapping_profiles_to_buildings["cell_id"] = mapping_profiles_to_buildings["cell_id"].astype(int)
986
987
    # ========== Write results to DB ==========
988
989
    OsmBuildingsSynthetic.__table__.drop(bind=engine, checkfirst=True)
990
    OsmBuildingsSynthetic.__table__.create(bind=engine, checkfirst=True)
991
992
    # Write new buildings incl coord into db
993
    n_amenities_inside_type = OsmBuildingsSynthetic.n_amenities_inside.type
994
    synthetic_buildings.to_postgis(
995
        "osm_buildings_synthetic",
996
        con=engine,
997
        if_exists="append",
998
        schema="openstreetmap",
999
        dtype={
1000
            "id": OsmBuildingsSynthetic.id.type,
1001
            "cell_id": OsmBuildingsSynthetic.cell_id.type,
1002
            "geom_building": OsmBuildingsSynthetic.geom_building.type,
1003
            "geom_point": OsmBuildingsSynthetic.geom_point.type,
1004
            "n_amenities_inside": n_amenities_inside_type,
1005
            "building": OsmBuildingsSynthetic.building.type,
1006
            "area": OsmBuildingsSynthetic.area.type,
1007
        },
1008
    )
1009
1010
    HouseholdElectricityProfilesOfBuildings.__table__.drop(
1011
        bind=engine, checkfirst=True
1012
    )
1013
    HouseholdElectricityProfilesOfBuildings.__table__.create(
1014
        bind=engine, checkfirst=True
1015
    )
1016
1017
    # Write building mapping into db
1018
    with db.session_scope() as session:
1019
        session.bulk_insert_mappings(
1020
            HouseholdElectricityProfilesOfBuildings,
1021
            mapping_profiles_to_buildings.to_dict(orient="records"),
1022
        )
1023
1024
1025
def create_buildings_profiles_stats():
1026
    """
1027
    Create DB table `demand.egon_household_electricity_profile_of_buildings_stats`
1028
    with household profile type counts per building
1029
    """
1030
1031
    # Drop and recreate table if existing
1032
    HouseholdElectricityProfilesOfBuildingsStats.__table__.drop(
1033
        bind=engine, checkfirst=True
1034
    )
1035
    HouseholdElectricityProfilesOfBuildingsStats.__table__.create(
1036
        bind=engine, checkfirst=True
1037
    )
1038
1039
    # Query final profile table
1040
    with db.session_scope() as session:
1041
        cells_query = session.query(
1042
            HouseholdElectricityProfilesOfBuildings,
1043
        ).order_by(HouseholdElectricityProfilesOfBuildings.id)
1044
1045
        df_buildings_and_profiles = pd.read_sql(
1046
            cells_query.statement, cells_query.session.bind, index_col="id"
1047
        )
1048
1049
    # Extract household type prefix
1050
    df_buildings_and_profiles = df_buildings_and_profiles.assign(
1051
        household_type=df_buildings_and_profiles.profile_id.str[:2]
1052
    )
1053
1054
    # Unstack and create total
1055
    df_buildings_and_profiles = (
1056
        df_buildings_and_profiles.groupby("building_id")
1057
        .value_counts(["household_type"])
1058
        .unstack(fill_value=0)
1059
    )
1060
    df_buildings_and_profiles["households_total"] = (
1061
        df_buildings_and_profiles.sum(axis=1)
1062
    )
1063
1064
    # Write to DB
1065
    df_buildings_and_profiles.to_sql(
1066
        name=HouseholdElectricityProfilesOfBuildingsStats.__table__.name,
1067
        schema=HouseholdElectricityProfilesOfBuildingsStats.__table__.schema,
1068
        con=engine,
1069
        if_exists="append",
1070
    )
1071
1072
1073
class setup(Dataset):
1074
    """
1075
    Household electricity demand profiles for scenarios in 2035 and 2050
1076
    assigned to buildings.
1077
1078
    Assignment of household electricity demand timeseries to OSM buildings
1079
    and generation of randomly placed synthetic 5x5m buildings if no
1080
    sufficient OSM-data available in the respective census cell.
1081
1082
    For more information see data documentation on :ref:`electricity-demand-ref`.
1083
1084
    *Dependencies*
1085
      * :py:func:`houseprofiles_in_census_cells
1086
        <egon.data.datasets.electricity_demand_timeseries.hh_profiles.houseprofiles_in_census_cells>`
1087
1088
    *Resulting tables*
1089
      * :py:class:`OsmBuildingsSynthetic
1090
        <egon.data.datasets.electricity_demand_timeseries.hh_buildings.OsmBuildingsSynthetic>`
1091
        is created and filled
1092
      * :py:class:`HouseholdElectricityProfilesOfBuildings
1093
        <egon.data.datasets.electricity_demand_timeseries.hh_buildings.HouseholdElectricityProfilesOfBuildings>`
1094
        is created and filled
1095
      * :py:class:`BuildingElectricityPeakLoads
1096
        <egon.data.datasets.electricity_demand_timeseries.hh_buildings.BuildingElectricityPeakLoads>`
1097
        is created and filled
1098
1099
    **The following datasets from the database are used for creation:**
1100
1101
    * `demand.household_electricity_profiles_in_census_cells`:
1102
        Lists references and scaling parameters to time series data for each
1103
        household in a cell by identifiers. This table is fundamental for
1104
        creating subsequent data like demand profiles on MV grid level or
1105
        for determining the peak load at load. Only the profile reference
1106
        and the cell identifiers are used.
1107
1108
    * `society.egon_destatis_zensus_apartment_building_population_per_ha`:
1109
        Lists number of apartments, buildings and population for each census
1110
        cell.
1111
1112
    * `boundaries.egon_map_zensus_buildings_residential`:
1113
        List of OSM tagged buildings which are considered to be residential.
1114
1115
1116
    **What is the goal?**
1117
1118
    To assign every household demand profile allocated each census cell to a
1119
    specific building.
1120
1121
    **What is the challenge?**
1122
1123
    The census and the OSM dataset differ from each other. The census uses
1124
    statistical methods and therefore lacks accuracy at high spatial
1125
    resolution. The OSM dataset is a community based dataset which is
1126
    extended throughout and does not claim to be complete. By merging these
1127
    datasets inconsistencies need to be addressed. For example: not yet
1128
    tagged buildings in OSM or new building areas not considered in census
1129
    2011.
1130
1131
    **How are these datasets combined?**
1132
1133
    The assignment of household demand timeseries to buildings takes place
1134
    at cell level. Within each cell a pool of profiles exists, produced by
1135
    the 'HH Demand" module. These profiles are randomly assigned to a
1136
    filtered list of OSM buildings within this cell. Every profile is
1137
    assigned to a building and every building get a profile assigned if
1138
    there is enough households by the census data. If there are more
1139
    profiles than buildings, all additional profiles are randomly assigned.
1140
    Therefore, multiple profiles can be assigned to one building, making it a
1141
    multi-household building. If there are no OSM buildings available,
1142
    synthetic ones are created (see below).
1143
1144
    **What are central assumptions during the data processing?**
1145
1146
    * Mapping zensus data to OSM data is not trivial.
1147
      Discrepancies are substituted.
1148
    * Missing OSM buildings are generated by census building count.
1149
    * If no census building count data is available, the number of buildings
1150
      is derived by an average rate of households/buildings applied to the
1151
      number of households.
1152
1153
    **Drawbacks and limitations of the data**
1154
1155
    * Missing OSM buildings in cells without census building count are
1156
      derived by an average (median) rate of households/buildings applied
1157
      to the number of households. We use a twofold approach for the rate:
1158
      first, the rate is calculated using adjacent cells (function
1159
      `find_adjacent_cells()`), a distance of 3 cells in each direction is
1160
      used by default (resulting in a 7x7 lookup matrix). For the remaining
1161
      cells, i.e. cells without any rate in the adjacent cells, the global
1162
      median rate is used.
1163
1164
      As only whole houses can exist, the substitute is ceiled to the next
1165
      higher integer. Ceiling is applied to avoid rounding to amount of 0
1166
      buildings.
1167
1168
    * As this dataset uses the load profile assignment at census cell level
1169
      conducted in hh_profiles.py, also check drawbacks and limitations in that module.
1170
1171
    **Example Query**
1172
1173
    * Get a list with number of houses, households and household types per
1174
      census cell
1175
1176
    .. code-block:: SQL
1177
1178
        SELECT t1.cell_id, building_count, hh_count, hh_types FROM (
1179
            SELECT
1180
                cell_id,
1181
                COUNT(DISTINCT(building_id)) AS building_count,
1182
                COUNT(profile_id) AS hh_count
1183
            FROM demand.egon_household_electricity_profile_of_buildings
1184
            GROUP BY cell_id
1185
        ) AS t1
1186
        FULL OUTER JOIN (
1187
            SELECT
1188
                cell_id,
1189
                array_agg(
1190
                    array[CAST(hh_10types AS char), hh_type]
1191
                ) AS hh_types
1192
            FROM society.egon_destatis_zensus_household_per_ha_refined
1193
            GROUP BY cell_id
1194
        ) AS t2
1195
        ON t1.cell_id = t2.cell_id
1196
1197
    """
1198
1199
    #:
1200
    name: str = "Demand_Building_Assignment"
1201
    #:
1202
    version: str = "0.0.7"
1203
    #:
1204
    tasks = (
1205
        map_houseprofiles_to_buildings,
1206
        create_buildings_profiles_stats,
1207
        get_building_peak_loads,
1208
    )
1209
1210
    def __init__(self, dependencies):
1211
        super().__init__(
1212
            name=self.name,
1213
            version=self.version,
1214
            dependencies=dependencies,
1215
            tasks=self.tasks,
1216
        )
1217