Passed
Pull Request — dev (#1375)
by
unknown
05:22
created

data.datasets.electricity_demand_timeseries.hh_buildings   A

Complexity

Total Complexity 27

Size/Duplication

Total Lines 1313
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 27
eloc 624
dl 0
loc 1313
rs 9.976
c 0
b 0
f 0

7 Functions

Rating   Name   Duplication   Size   Complexity  
C map_houseprofiles_to_buildings() 0 247 8
B generate_synthetic_buildings() 0 109 4
B generate_mapping_table() 0 160 2
B get_building_peak_loads() 0 114 6
B match_osm_and_zensus_data() 0 225 2
A reduce_synthetic_buildings() 0 43 2
A create_buildings_profiles_stats() 0 45 2

1 Method

Rating   Name   Duplication   Size   Complexity  
B setup.__init__() 0 78 1
1
"""
2
Household electricity demand time series for scenarios in 2035 and 2050
3
assigned to OSM-buildings.
4
5
"""
6
7
import random
8
9
from geoalchemy2 import Geometry
10
from sqlalchemy import REAL, Column, Integer, String, Table, func, inspect
11
from sqlalchemy.ext.declarative import declarative_base
12
import geopandas as gpd
13
import numpy as np
14
import pandas as pd
15
16
from egon.data import db
17
from egon.data.datasets import Dataset
18
from egon.data.datasets.electricity_demand_timeseries.hh_profiles import (
19
    HouseholdElectricityProfilesInCensusCells,
20
    get_iee_hh_demand_profiles_raw,
21
)
22
from egon.data.datasets.electricity_demand_timeseries.tools import (
23
    random_point_in_square,
24
)
25
import egon.data.config
26
from egon_validation import (
27
    RowCountValidation,
28
    DataTypeValidation,
29
    WholeTableNotNullAndNotNaNValidation,
30
    ValueSetValidation
31
)
32
33
engine = db.engine()
34
Base = declarative_base()
35
36
data_config = egon.data.config.datasets()
37
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"]
38
np.random.seed(RANDOM_SEED)
39
40
41
class HouseholdElectricityProfilesOfBuildings(Base):
42
    """
43
    Class definition of table demand.egon_household_electricity_profile_of_buildings.
44
45
    Mapping of demand timeseries and buildings and cell_id. This table is created within
46
    :py:func:`hh_buildings.map_houseprofiles_to_buildings()`.
47
48
    """
49
50
    __tablename__ = "egon_household_electricity_profile_of_buildings"
51
    __table_args__ = {"schema": "demand"}
52
53
    id = Column(Integer, primary_key=True)
54
    building_id = Column(Integer, index=True)
55
    cell_id = Column(Integer, index=True)
56
    profile_id = Column(String, index=True)
57
58
59
class HouseholdElectricityProfilesOfBuildingsStats(Base):
60
    """
61
    Class definition of table `demand.egon_household_electricity_profile_of_buildings_stats`.
62
    Contains number of households per building and type from table
63
    `demand.egon_household_electricity_profile_of_buildings`
64
65
    Columns
66
    -------
67
    building_id: Building id as used in tables `openstreetmap.osm_buildings_*`, index col
68
    households_total: total count of households
69
    SR: count of household type SR single retiree
70
    SO: count of household type SA single adults
71
    PR: count of household type PR pair retiree
72
    PO: count of household type PA pair adults
73
    SK: count of household type SK single n children
74
    P1: count of household type P1 pair 1 child
75
    P2: count of household type P2 pair 2 children
76
    P3: count of household type P3 pair 3 children
77
    OR: count of household type OR multi retiree n children
78
    OO: count of household type OO multi adults n children
79
    """
80
81
    __tablename__ = "egon_household_electricity_profile_of_buildings_stats"
82
    __table_args__ = {"schema": "demand"}
83
84
    building_id = Column(Integer, primary_key=True)
85
    households_total = Column(Integer, nullable=True)
86
    SR = Column(Integer, nullable=True)
87
    SO = Column(Integer, nullable=True)
88
    PR = Column(Integer, nullable=True)
89
    PO = Column(Integer, nullable=True)
90
    SK = Column(Integer, nullable=True)
91
    P1 = Column(Integer, nullable=True)
92
    P2 = Column(Integer, nullable=True)
93
    P3 = Column(Integer, nullable=True)
94
    OR = Column(Integer, nullable=True)
95
    OO = Column(Integer, nullable=True)
96
97
98
class OsmBuildingsSynthetic(Base):
99
    """
100
    Class definition of table demand.osm_buildings_synthetic.
101
102
    Lists generated synthetic building with id, zensus_population_id and
103
    building type. This table is created within
104
    :py:func:`hh_buildings.map_houseprofiles_to_buildings()`.
105
    """
106
107
    __tablename__ = "osm_buildings_synthetic"
108
    __table_args__ = {"schema": "openstreetmap"}
109
110
    id = Column(String, primary_key=True)
111
    cell_id = Column(String, index=True)
112
    geom_building = Column(Geometry("Polygon", 3035), index=True)
113
    geom_point = Column(Geometry("POINT", 3035))
114
    n_amenities_inside = Column(Integer)
115
    building = Column(String(11))
116
    area = Column(REAL)
117
118
119
class BuildingElectricityPeakLoads(Base):
120
    """
121
    Class definition of table demand.egon_building_electricity_peak_loads.
122
123
    Mapping of electricity demand time series and buildings including cell_id,
124
    building area and peak load. This table is created within
125
    :func:`hh_buildings.get_building_peak_loads()`.
126
    """
127
128
    __tablename__ = "egon_building_electricity_peak_loads"
129
    __table_args__ = {"schema": "demand"}
130
131
    building_id = Column(Integer, primary_key=True)
132
    scenario = Column(String, primary_key=True)
133
    sector = Column(String, primary_key=True)
134
    peak_load_in_w = Column(REAL)
135
    voltage_level = Column(Integer, index=True)
136
137
138
def match_osm_and_zensus_data(
139
    egon_hh_profile_in_zensus_cell,
140
    egon_map_zensus_buildings_residential,
141
):
142
    """
143
    Compares OSM buildings and census hh demand profiles.
144
145
    OSM building data and hh demand profiles based on census data is compared.
146
    Census cells with only profiles but no osm-ids are identified to generate
147
    synthetic buildings. Census building count is used, if available, to define
148
    number of missing buildings. Otherwise, we use a twofold approach for the
149
    rate: first, the rate is calculated using adjacent cells (function
150
    `find_adjacent_cells()`), a distance of 3 cells in each direction is used
151
    by default (resulting in a 7x7 lookup matrix). As fallback, the overall
152
    median profile/building rate is used to derive the number of buildings
153
    from the number of already generated demand profiles.
154
155
    Parameters
156
    ----------
157
    egon_hh_profile_in_zensus_cell: pd.DataFrame
158
        Table mapping hh demand profiles to census cells
159
160
    egon_map_zensus_buildings_residential: pd.DataFrame
161
        Table with buildings osm-id and cell_id
162
163
    Returns
164
    -------
165
    pd.DataFrame
166
        Table with cell_ids and number of missing buildings
167
    """
168
169
    def find_adjacent_cells(row, adj_cell_radius):
170
        """
171
        Find adjacent cells for cell by iterating over census grid ids
172
        (100mN...E...).
173
174
        Parameters
175
        ----------
176
        row : Dataframe row
177
            Dataframe row
178
        adj_cell_radius : int
179
            distance of cells in each direction to find cells,
180
            e.g. adj_cell_radius=3 -> 7x7 cell matrix
181
182
        Returns
183
        -------
184
        tuples of int
185
            N coordinates, E coordinates in format
186
            [(N_cell_1, E_cell_1), ..., (N_cell_n, E_cell_n)]
187
        """
188
        return [
189
            f"100mN{_[0]}E{_[1]}"
190
            for _ in np.array(
191
                np.meshgrid(
192
                    np.arange(
193
                        row.N - adj_cell_radius, row.N + adj_cell_radius + 1
194
                    ),
195
                    np.arange(
196
                        row.E - adj_cell_radius, row.E + adj_cell_radius + 1
197
                    ),
198
                )
199
            ).T.reshape(-1, 2)
200
        ]
201
202
    # count number of profiles for each cell
203
    profiles_per_cell = egon_hh_profile_in_zensus_cell.cell_profile_ids.apply(
204
        len
205
    )
206
207
    # Add number of profiles per cell
208
    number_of_buildings_profiles_per_cell = pd.merge(
209
        left=profiles_per_cell,
210
        right=egon_hh_profile_in_zensus_cell["cell_id"],
211
        left_index=True,
212
        right_index=True,
213
    )
214
215
    # count buildings/ids for each cell
216
    buildings_per_cell = egon_map_zensus_buildings_residential.groupby(
217
        "cell_id"
218
    )["id"].count()
219
    buildings_per_cell = buildings_per_cell.rename("building_ids")
220
221
    # add buildings left join to have all the cells with assigned profiles
222
    number_of_buildings_profiles_per_cell = pd.merge(
223
        left=number_of_buildings_profiles_per_cell,
224
        right=buildings_per_cell,
225
        left_on="cell_id",
226
        right_index=True,
227
        how="left",
228
    )
229
230
    # identify cell ids with profiles but no buildings
231
    number_of_buildings_profiles_per_cell = (
232
        number_of_buildings_profiles_per_cell.fillna(0).astype(int)
233
    )
234
    missing_buildings = number_of_buildings_profiles_per_cell.loc[
235
        number_of_buildings_profiles_per_cell.building_ids == 0,
236
        ["cell_id", "cell_profile_ids"],
237
    ].set_index("cell_id")
238
239
    # query zensus building count
240
    egon_destatis_building_count = Table(
241
        "egon_destatis_zensus_apartment_building_population_per_ha",
242
        Base.metadata,
243
        schema="society",
244
    )
245
    # get table metadata from db by name and schema
246
    inspect(engine).reflecttable(egon_destatis_building_count, None)
247
248
    with db.session_scope() as session:
249
        cells_query = session.query(
250
            egon_destatis_building_count.c.zensus_population_id,
251
            egon_destatis_building_count.c.building_count,
252
        )
253
254
    egon_destatis_building_count = pd.read_sql(
255
        cells_query.statement,
256
        cells_query.session.bind,
257
        index_col="zensus_population_id",
258
    )
259
    egon_destatis_building_count = egon_destatis_building_count.dropna()
260
261
    missing_buildings = pd.merge(
262
        left=missing_buildings,
263
        right=egon_destatis_building_count,
264
        left_index=True,
265
        right_index=True,
266
        how="left",
267
    )
268
269
    # exclude cells without buildings
270
    only_cells_with_buildings = (
271
        number_of_buildings_profiles_per_cell["building_ids"] != 0
272
    )
273
    # get profile/building rate for each cell
274
    profile_building_rate = (
275
        number_of_buildings_profiles_per_cell.loc[
276
            only_cells_with_buildings, "cell_profile_ids"
277
        ]
278
        / number_of_buildings_profiles_per_cell.loc[
279
            only_cells_with_buildings, "building_ids"
280
        ]
281
    )
282
283
    # prepare values for missing building counts by number of profile ids
284
    building_count_fillna = missing_buildings.loc[
285
        missing_buildings["building_count"].isna(), "cell_profile_ids"
286
    ]
287
    # devide by median profile/building rate
288
    building_count_fillna = (
289
        building_count_fillna / profile_building_rate.median()
290
    )
291
    # replace missing building counts
292
    missing_buildings["building_count"] = missing_buildings[
293
        "building_count"
294
    ].fillna(value=building_count_fillna)
295
296
    # ========== START Update profile/building rate in cells w/o bld using adjacent cells ==========
297
    missing_buildings_temp = (
298
        egon_hh_profile_in_zensus_cell[["cell_id", "grid_id"]]
299
        .set_index("cell_id")
300
        .loc[missing_buildings.index.unique()]
301
    )
302
303
    # Extract coordinates
304
    missing_buildings_temp = pd.concat(
305
        [
306
            missing_buildings_temp,
307
            missing_buildings_temp.grid_id.str.extract(r"100mN(\d+)E(\d+)")
308
            .astype(int)
309
            .rename(columns={0: "N", 1: "E"}),
310
        ],
311
        axis=1,
312
    )
313
314
    # Find adjacent cells for cell
315
    missing_buildings_temp["cell_adj"] = missing_buildings_temp.apply(
316
        find_adjacent_cells, adj_cell_radius=3, axis=1
317
    )
318
    missing_buildings_temp = (
319
        missing_buildings_temp.explode("cell_adj")
320
        .drop(columns=["grid_id", "N", "E"])
321
        .reset_index()
322
    )
323
324
    # Create mapping table cell -> adjacent cells
325
    missing_buildings_temp = (
326
        missing_buildings_temp.set_index("cell_adj")
327
        .join(
328
            egon_hh_profile_in_zensus_cell.set_index("grid_id").cell_id,
329
            rsuffix="_adj",
330
        )
331
        .dropna()
332
        .set_index("cell_id_adj")
333
    )
334
335
    # Calculate profile/building rate for those cells
336
    profile_building_rate.name = "profile_building_rate"
337
    missing_buildings_temp = missing_buildings_temp.join(
338
        number_of_buildings_profiles_per_cell[["cell_id"]]
339
        .join(profile_building_rate)
340
        .set_index("cell_id")
341
    )
342
    missing_buildings_temp = (
343
        missing_buildings_temp.groupby("cell_id").median().dropna()
344
    )
345
346
    # Update mising buildings
347
    missing_buildings["building_count"] = (
348
        missing_buildings.cell_profile_ids.div(
349
            missing_buildings_temp.profile_building_rate
350
        ).fillna(missing_buildings.building_count)
351
    )
352
    # ========== END Update profile/building rate in cells w/o bld using adjacent cells ==========
353
354
    # ceil to have at least one building each cell and make type int
355
    missing_buildings = missing_buildings.apply(np.ceil).astype(int)
356
    # generate list of building ids for each cell
357
    missing_buildings["building_count"] = missing_buildings[
358
        "building_count"
359
    ].apply(range)
360
    missing_buildings = missing_buildings.explode(column="building_count")
361
362
    return missing_buildings
363
364
365
def generate_synthetic_buildings(missing_buildings, edge_length):
366
    """
367
    Generate synthetic square buildings in census cells for every entry
368
    in missing_buildings.
369
370
    Generate random placed synthetic buildings incl geom data within the bounds
371
    of the cencus cell. Buildings have each a square area with edge_length^2.
372
373
374
    Parameters
375
    ----------
376
    missing_buildings: pd.Series or pd.DataFrame
377
        Table with cell_ids and building number
378
    edge_length: int
379
        Edge length of square synthetic building in meter
380
381
    Returns
382
    -------
383
    pd.DataFrame
384
        Table with generated synthetic buildings, area, cell_id and geom data
385
386
    """
387
    destatis_zensus_population_per_ha_inside_germany = Table(
388
        "destatis_zensus_population_per_ha_inside_germany",
389
        Base.metadata,
390
        schema="society",
391
    )
392
    # get table metadata from db by name and schema
393
    inspect(engine).reflecttable(
394
        destatis_zensus_population_per_ha_inside_germany, None
395
    )
396
397
    with db.session_scope() as session:
398
        cells_query = session.query(
399
            destatis_zensus_population_per_ha_inside_germany
400
        ).filter(
401
            destatis_zensus_population_per_ha_inside_germany.c.id.in_(
402
                missing_buildings.index.unique()
403
            )
404
        )
405
406
    destatis_zensus_population_per_ha_inside_germany = gpd.read_postgis(
407
        cells_query.statement, cells_query.session.bind, index_col="id"
408
    )
409
410
    # add geom data of zensus cell
411
    missing_buildings_geom = pd.merge(
412
        left=destatis_zensus_population_per_ha_inside_germany[["geom"]],
413
        right=missing_buildings,
414
        left_index=True,
415
        right_index=True,
416
        how="right",
417
    )
418
419
    missing_buildings_geom = missing_buildings_geom.reset_index(drop=False)
420
    missing_buildings_geom = missing_buildings_geom.rename(
421
        columns={
422
            "building_count": "building_id",
423
            "cell_profile_ids": "profiles",
424
            "id": "cell_id",
425
        }
426
    )
427
428
    # create random points within census cells
429
    points = random_point_in_square(
430
        geom=missing_buildings_geom["geom"], tol=edge_length / 2
431
    )
432
433
    # Store center of poylon
434
    missing_buildings_geom["geom_point"] = points
435
    # Create building using a square around point
436
    missing_buildings_geom["geom_building"] = points.buffer(
437
        distance=edge_length / 2, cap_style=3
438
    )
439
    missing_buildings_geom = missing_buildings_geom.drop(columns=["geom"])
440
    missing_buildings_geom = gpd.GeoDataFrame(
441
        missing_buildings_geom, crs="EPSG:3035", geometry="geom_building"
442
    )
443
444
    # get table metadata from db by name and schema
445
    buildings = Table("osm_buildings", Base.metadata, schema="openstreetmap")
446
    inspect(engine).reflecttable(buildings, None)
447
448
    # get max number of building ids from non-filtered building table
449
    with db.session_scope() as session:
450
        buildings = session.execute(func.max(buildings.c.id)).scalar()
451
452
    # apply ids following the sequence of openstreetmap.osm_buildings id
453
    missing_buildings_geom["id"] = range(
454
        buildings + 1,
455
        buildings + len(missing_buildings_geom) + 1,
456
    )
457
458
    drop_columns = [
459
        i
460
        for i in ["building_id", "profiles"]
461
        if i in missing_buildings_geom.columns
462
    ]
463
    if drop_columns:
464
        missing_buildings_geom = missing_buildings_geom.drop(
465
            columns=drop_columns
466
        )
467
468
    missing_buildings_geom["building"] = "residential"
469
    missing_buildings_geom["area"] = missing_buildings_geom[
470
        "geom_building"
471
    ].area
472
473
    return missing_buildings_geom
474
475
476
def generate_mapping_table(
477
    egon_map_zensus_buildings_residential_synth,
478
    egon_hh_profile_in_zensus_cell,
479
):
480
    """
481
    Generate a mapping table for hh profiles to buildings.
482
483
    All hh demand profiles are randomly assigned to buildings within the same
484
    cencus cell.
485
486
    * profiles > buildings: buildings can have multiple profiles but every
487
        building gets at least one profile
488
    * profiles < buildings: not every building gets a profile
489
490
491
    Parameters
492
    ----------
493
    egon_map_zensus_buildings_residential_synth: pd.DataFrame
494
        Table with OSM and synthetic buildings ids per census cell
495
    egon_hh_profile_in_zensus_cell: pd.DataFrame
496
        Table mapping hh demand profiles to census cells
497
498
    Returns
499
    -------
500
    pd.DataFrame
501
        Table with mapping of profile ids to buildings with OSM ids
502
503
    """
504
505
    def create_pool(buildings, profiles):
506
        if profiles > buildings:
507
            surplus = profiles - buildings
508
            surplus = rng.integers(0, buildings, surplus)
509
            pool = list(range(buildings)) + list(surplus)
510
        else:
511
            pool = list(range(buildings))
512
        result = random.sample(population=pool, k=profiles)
513
514
        return result
515
516
    # group oms_ids by census cells and aggregate to list
517
    osm_ids_per_cell = (
518
        egon_map_zensus_buildings_residential_synth[["id", "cell_id"]]
519
        .groupby("cell_id")
520
        .agg(list)
521
    )
522
523
    # cell ids of cells with osm ids
524
    cells_with_buildings = osm_ids_per_cell.index.astype(int).values
525
    # cell ids of cells with profiles
526
    cells_with_profiles = (
527
        egon_hh_profile_in_zensus_cell["cell_id"].astype(int).values
528
    )
529
    # cell ids of cells with osm ids and profiles
530
    cell_with_profiles_and_buildings = np.intersect1d(
531
        cells_with_profiles, cells_with_buildings
532
    )
533
534
    # cells with only buildings might not be residential etc.
535
536
    # reduced list of profile_ids per cell with both buildings and profiles
537
    profile_ids_per_cell_reduced = egon_hh_profile_in_zensus_cell.set_index(
538
        "cell_id"
539
    ).loc[cell_with_profiles_and_buildings, "cell_profile_ids"]
540
    # reduced list of osm_ids per cell with both buildings and profiles
541
    osm_ids_per_cell_reduced = osm_ids_per_cell.loc[
542
        cell_with_profiles_and_buildings, "id"
543
    ].rename("building_ids")
544
545
    # concat both lists by same cell_id
546
    mapping_profiles_to_buildings_reduced = pd.concat(
547
        [profile_ids_per_cell_reduced, osm_ids_per_cell_reduced], axis=1
548
    )
549
550
    # count number of profiles and buildings for each cell
551
    # tells how many profiles have to be assigned to how many buildings
552
    number_profiles_and_buildings_reduced = (
553
        mapping_profiles_to_buildings_reduced.applymap(len)
554
    )
555
556
    # map profiles randomly per cell
557
    # if profiles > buildings, every building will get at least one profile
558
    rng = np.random.default_rng(RANDOM_SEED)
559
    random.seed(RANDOM_SEED)
560
    mapping_profiles_to_buildings = pd.Series(
561
        [
562
            create_pool(buildings, profiles)
563
            for buildings, profiles in zip(
564
                number_profiles_and_buildings_reduced["building_ids"].values,
565
                number_profiles_and_buildings_reduced[
566
                    "cell_profile_ids"
567
                ].values,
568
            )
569
        ],
570
        index=number_profiles_and_buildings_reduced.index,
571
    )
572
573
    # unnest building assignement per cell
574
    mapping_profiles_to_buildings = (
575
        mapping_profiles_to_buildings.rename("building")
576
        .explode()
577
        .reset_index()
578
    )
579
    # add profile position as attribute by number of entries per cell (*)
580
    mapping_profiles_to_buildings["profile"] = (
581
        mapping_profiles_to_buildings.groupby(["cell_id"]).cumcount()
582
    )
583
    # get multiindex of profiles in cells (*)
584
    index_profiles = mapping_profiles_to_buildings.set_index(
585
        ["cell_id", "profile"]
586
    ).index
587
    # get multiindex of buildings in cells (*)
588
    index_buildings = mapping_profiles_to_buildings.set_index(
589
        ["cell_id", "building"]
590
    ).index
591
592
    # get list of profiles by cell and profile position
593
    profile_ids_per_cell_reduced = (
594
        profile_ids_per_cell_reduced.explode().reset_index()
595
    )
596
    # assign profile position by order of list
597
    profile_ids_per_cell_reduced["profile"] = (
598
        profile_ids_per_cell_reduced.groupby(["cell_id"]).cumcount()
599
    )
600
    profile_ids_per_cell_reduced = profile_ids_per_cell_reduced.set_index(
601
        ["cell_id", "profile"]
602
    )
603
604
    # get list of building by cell and building number
605
    osm_ids_per_cell_reduced = osm_ids_per_cell_reduced.explode().reset_index()
606
    # assign building number by order of list
607
    osm_ids_per_cell_reduced["building"] = osm_ids_per_cell_reduced.groupby(
608
        ["cell_id"]
609
    ).cumcount()
610
    osm_ids_per_cell_reduced = osm_ids_per_cell_reduced.set_index(
611
        ["cell_id", "building"]
612
    )
613
614
    # map profiles and buildings by profile position and building number
615
    # merge is possible as both index results from the same origin (*) and are
616
    # not rearranged, therefore in the same order
617
    mapping_profiles_to_buildings = pd.merge(
618
        osm_ids_per_cell_reduced.loc[index_buildings].reset_index(drop=False),
619
        profile_ids_per_cell_reduced.loc[index_profiles].reset_index(
620
            drop=True
621
        ),
622
        left_index=True,
623
        right_index=True,
624
    )
625
626
    # rename columns
627
    mapping_profiles_to_buildings.rename(
628
        columns={
629
            "building_ids": "building_id",
630
            "cell_profile_ids": "profile_id",
631
        },
632
        inplace=True,
633
    )
634
635
    return mapping_profiles_to_buildings
636
637
638
def reduce_synthetic_buildings(
639
    mapping_profiles_to_buildings, synthetic_buildings
640
):
641
    """Reduced list of synthetic buildings to amount actually used.
642
643
    Not all are used, due to randomised assignment with replacing
644
    Id's are adapted to continuous number sequence following
645
    openstreetmap.osm_buildings"""
646
647
    buildings = Table("osm_buildings", Base.metadata, schema="openstreetmap")
648
    # get table metadata from db by name and schema
649
    inspect(engine).reflecttable(buildings, None)
650
651
    # total number of buildings
652
    with db.session_scope() as session:
653
        buildings = session.execute(func.max(buildings.c.id)).scalar()
654
655
    synth_ids_used = mapping_profiles_to_buildings.loc[
656
        mapping_profiles_to_buildings["building_id"] > buildings,
657
        "building_id",
658
    ].unique()
659
660
    synthetic_buildings = synthetic_buildings.loc[
661
        synthetic_buildings["id"].isin(synth_ids_used)
662
    ]
663
    # id_mapping = dict(
664
    #     list(
665
    #         zip(
666
    #             synth_ids_used,
667
    #             range(
668
    #                 buildings,
669
    #                 buildings
670
    #                 + len(synth_ids_used) + 1
671
    #             )
672
    #         )
673
    #     )
674
    # )
675
676
    # time expensive because of regex
677
    # mapping_profiles_to_buildings['building_id'] = (
678
    #     mapping_profiles_to_buildings['building_id'].replace(id_mapping)
679
    # )
680
    return synthetic_buildings
681
682
683
def get_building_peak_loads():
684
    """
685
    Peak loads of buildings are determined.
686
687
    Timeseries for every building are accumulated, the maximum value
688
    determined and with the respective nuts3 factor scaled for 2035 and 2050
689
    scenario.
690
691
    Note
692
    ----------
693
    In test-mode 'SH' the iteration takes place by 'cell_id' to avoid
694
    intensive RAM usage. For whole Germany 'nuts3' are taken and
695
    RAM > 32GB is necessary.
696
    """
697
698
    with db.session_scope() as session:
699
        cells_query = (
700
            session.query(
701
                HouseholdElectricityProfilesOfBuildings,
702
                HouseholdElectricityProfilesInCensusCells.nuts3,
703
                HouseholdElectricityProfilesInCensusCells.factor_2019,
704
                HouseholdElectricityProfilesInCensusCells.factor_2023,
705
                HouseholdElectricityProfilesInCensusCells.factor_2035,
706
                HouseholdElectricityProfilesInCensusCells.factor_2050,
707
            )
708
            .filter(
709
                HouseholdElectricityProfilesOfBuildings.cell_id
710
                == HouseholdElectricityProfilesInCensusCells.cell_id
711
            )
712
            .order_by(HouseholdElectricityProfilesOfBuildings.id)
713
        )
714
715
        df_buildings_and_profiles = pd.read_sql(
716
            cells_query.statement, cells_query.session.bind, index_col="id"
717
        )
718
719
        # fill columns with None with np.nan to allow multiplication with emtpy columns
720
        df_buildings_and_profiles = df_buildings_and_profiles.fillna(np.nan)
721
722
        # Read demand profiles from egon-data-bundle
723
        df_profiles = get_iee_hh_demand_profiles_raw()
724
725
        def ve(s):
726
            raise (ValueError(s))
727
728
        dataset = egon.data.config.settings()["egon-data"][
729
            "--dataset-boundary"
730
        ]
731
        iterate_over = (
732
            "nuts3"
733
            if dataset == "Everything"
734
            else (
735
                "cell_id"
736
                if dataset == "Schleswig-Holstein"
737
                else ve(f"'{dataset}' is not a valid dataset boundary.")
738
            )
739
        )
740
741
        df_building_peak_loads = pd.DataFrame()
742
743
        for nuts3, df in df_buildings_and_profiles.groupby(by=iterate_over):
744
            df_building_peak_load_nuts3 = df_profiles.loc[:, df.profile_id]
745
746
            m_index = pd.MultiIndex.from_arrays(
747
                [df.profile_id, df.building_id],
748
                names=("profile_id", "building_id"),
749
            )
750
            df_building_peak_load_nuts3.columns = m_index
751
            df_building_peak_load_nuts3 = (
752
                df_building_peak_load_nuts3.groupby("building_id", axis=1)
753
                .sum()
754
                .max()
755
            )
756
757
            df_building_peak_load_nuts3 = pd.DataFrame(
758
                [
759
                    df_building_peak_load_nuts3 * df["factor_2019"].unique(),
760
                    df_building_peak_load_nuts3 * df["factor_2023"].unique(),
761
                    df_building_peak_load_nuts3 * df["factor_2035"].unique(),
762
                    df_building_peak_load_nuts3 * df["factor_2050"].unique(),
763
                ],
764
                index=[
765
                    "status2019",
766
                    "status2023",
767
                    "eGon2035",
768
                    "eGon100RE",
769
                ],
770
            ).T
771
772
            df_building_peak_loads = pd.concat(
773
                [df_building_peak_loads, df_building_peak_load_nuts3], axis=0
774
            )
775
776
        df_building_peak_loads.reset_index(inplace=True)
777
        df_building_peak_loads["sector"] = "residential"
778
779
        BuildingElectricityPeakLoads.__table__.drop(
780
            bind=engine, checkfirst=True
781
        )
782
        BuildingElectricityPeakLoads.__table__.create(
783
            bind=engine, checkfirst=True
784
        )
785
786
        df_building_peak_loads = df_building_peak_loads.melt(
787
            id_vars=["building_id", "sector"],
788
            var_name="scenario",
789
            value_name="peak_load_in_w",
790
        )
791
792
        # Write peak loads into db
793
        with db.session_scope() as session:
794
            session.bulk_insert_mappings(
795
                BuildingElectricityPeakLoads,
796
                df_building_peak_loads.to_dict(orient="records"),
797
            )
798
799
800
def map_houseprofiles_to_buildings():
801
    """
802
    Census hh demand profiles are assigned to residential buildings via osm ids.
803
    If no OSM ids are available, synthetic buildings are generated. A list of the
804
    generated buildings and supplementary data as well as the mapping table is stored
805
    in the db.
806
807
    **Tables**
808
809
    synthetic_buildings:
810
        schema: openstreetmap
811
        tablename: osm_buildings_synthetic
812
813
    mapping_profiles_to_buildings:
814
        schema: demand
815
        tablename: egon_household_electricity_profile_of_buildings
816
817
    """
818
    # ========== Get census cells ==========
819
    egon_census_cells = Table(
820
        "egon_destatis_zensus_apartment_building_population_per_ha",
821
        Base.metadata,
822
        schema="society",
823
    )
824
    inspect(engine).reflecttable(egon_census_cells, None)
825
826
    with db.session_scope() as session:
827
        cells_query = session.query(
828
            egon_census_cells.c.zensus_population_id,
829
            egon_census_cells.c.population,
830
            egon_census_cells.c.geom,
831
        ).order_by(egon_census_cells.c.zensus_population_id)
832
        gdf_egon_census_cells = gpd.read_postgis(
833
            cells_query.statement, cells_query.session.bind, geom_col="geom"
834
        )
835
836
    # ========== Get residential buildings ==========
837
    egon_osm_buildings_residential = Table(
838
        "osm_buildings_residential",
839
        Base.metadata,
840
        schema="openstreetmap",
841
    )
842
    inspect(engine).reflecttable(egon_osm_buildings_residential, None)
843
844
    with db.session_scope() as session:
845
        cells_query = session.query(
846
            egon_osm_buildings_residential.c.id.label("building_id"),
847
            egon_osm_buildings_residential.c.geom_building,
848
        ).order_by(egon_osm_buildings_residential.c.id)
849
        gdf_egon_osm_buildings = gpd.read_postgis(
850
            cells_query.statement,
851
            cells_query.session.bind,
852
            geom_col="geom_building",
853
        )
854
855
    # ========== Clip buildings centroids with census cells to get main buildings ==========
856
857
    # Copy buildings and set centroid as geom
858
    gdf_egon_osm_buildings_main = gdf_egon_osm_buildings.copy()
859
    gdf_egon_osm_buildings_main["geom_point"] = (
860
        gdf_egon_osm_buildings_main.centroid
861
    )
862
    gdf_egon_osm_buildings_main = gdf_egon_osm_buildings_main.drop(
863
        columns=["geom_building"]
864
    ).set_geometry("geom_point")
865
866
    egon_map_zensus_buildings_residential_main = gpd.sjoin(
867
        gdf_egon_osm_buildings_main,
868
        gdf_egon_census_cells,
869
        how="inner",
870
        predicate="within",
871
    )[["building_id", "zensus_population_id"]].rename(
872
        columns={"zensus_population_id": "cell_id"}
873
    )
874
875
    # ========== Clip buildings with census cells to get building parts ==========
876
877
    # Clip to create new build parts as buildings
878
    gdf_egon_osm_buildings_census_cells = gdf_egon_census_cells.overlay(
879
        gdf_egon_osm_buildings, how="intersection"
880
    )
881
882
    # Remove main buildings which are not located in populated census cells
883
    buildings_centroid_not_in_census_cells = (
884
        gdf_egon_osm_buildings_census_cells.loc[
885
            ~gdf_egon_osm_buildings_census_cells.building_id.isin(
886
                egon_map_zensus_buildings_residential_main.building_id
887
            )
888
        ]
889
    )
890
    gdf_egon_osm_buildings_census_cells = (
891
        gdf_egon_osm_buildings_census_cells.loc[
892
            ~gdf_egon_osm_buildings_census_cells.building_id.isin(
893
                buildings_centroid_not_in_census_cells.building_id.to_list()
894
            )
895
        ]
896
    )
897
898
    gdf_egon_osm_buildings_census_cells["geom_point"] = (
899
        gdf_egon_osm_buildings_census_cells.centroid
900
    )
901
902
    # Add column with unique building ids using suffixes (building parts split by clipping)
903
    gdf_egon_osm_buildings_census_cells["building_id_temp"] = (
904
        gdf_egon_osm_buildings_census_cells["building_id"].astype(str)
905
    )
906
    g = (
907
        gdf_egon_osm_buildings_census_cells.groupby("building_id_temp")
908
        .cumcount()
909
        .add(1)
910
        .astype(str)
911
    )
912
    gdf_egon_osm_buildings_census_cells["building_id_temp"] += "_" + g
913
914
    # Check
915
    try:
916
        assert len(
917
            gdf_egon_osm_buildings_census_cells.building_id_temp.unique()
918
        ) == len(gdf_egon_osm_buildings_census_cells)
919
    except AssertionError:
920
        print(
921
            "The length of split buildings do not match with original count."
922
        )
923
924
    egon_map_zensus_buildings_residential = (
925
        gdf_egon_osm_buildings_census_cells[
926
            ["zensus_population_id", "building_id_temp"]
927
        ].rename(
928
            columns={
929
                "zensus_population_id": "cell_id",
930
                "building_id_temp": "id",
931
            }
932
        )
933
    )
934
935
    # Get household profile to census cells allocations
936
    with db.session_scope() as session:
937
        cells_query = session.query(HouseholdElectricityProfilesInCensusCells)
938
    egon_hh_profile_in_zensus_cell = pd.read_sql(
939
        cells_query.statement, cells_query.session.bind, index_col=None
940
    )
941
942
    # Match OSM and zensus data to define missing buildings
943
    missing_buildings = match_osm_and_zensus_data(
944
        egon_hh_profile_in_zensus_cell,
945
        egon_map_zensus_buildings_residential,
946
    )
947
948
    # randomly generate synthetic buildings in cell without any
949
    synthetic_buildings = generate_synthetic_buildings(
950
        missing_buildings, edge_length=5
951
    )
952
953
    # add synthetic buildings to df
954
    egon_map_zensus_buildings_residential_synth = pd.concat(
955
        [
956
            egon_map_zensus_buildings_residential,
957
            synthetic_buildings[["id", "cell_id"]],
958
        ],
959
        ignore_index=True,
960
    )
961
962
    # assign profiles to buildings
963
    mapping_profiles_to_buildings = generate_mapping_table(
964
        egon_map_zensus_buildings_residential_synth,
965
        egon_hh_profile_in_zensus_cell,
966
    )
967
968
    # remove suffixes from buildings split into parts before to merge them back together
969
    mapping_profiles_to_buildings["building_id"] = (
970
        mapping_profiles_to_buildings.building_id.astype(str).apply(
971
            lambda s: s.split("_")[0] if "_" in s else s
972
        )
973
    )
974
    mapping_profiles_to_buildings["building_id"] = (
975
        mapping_profiles_to_buildings["building_id"].astype(int)
976
    )
977
978
    # reduce list to only used synthetic buildings
979
    synthetic_buildings = reduce_synthetic_buildings(
980
        mapping_profiles_to_buildings, synthetic_buildings
981
    )
982
    synthetic_buildings["n_amenities_inside"] = 0
983
984
    # ========== Reallocate profiles from building part to main building (correct cell_id) ==========
985
    # cf. https://github.com/openego/eGon-data/issues/1190
986
987
    # Get and allocate main building_id
988
    egon_map_zensus_buildings_residential_main = pd.merge(
989
        mapping_profiles_to_buildings[["cell_id", "building_id"]],
990
        egon_map_zensus_buildings_residential_main,
991
        on="building_id",
992
        how="left",
993
        suffixes=("_df1", "_df2"),
994
    ).dropna()
995
    egon_map_zensus_buildings_residential_main["cell_id_df2"] = (
996
        egon_map_zensus_buildings_residential_main["cell_id_df2"].astype(int)
997
    )
998
    mapping_profiles_to_buildings2 = mapping_profiles_to_buildings.copy()
999
    mapping_profiles_to_buildings["cell_id"] = (
1000
        egon_map_zensus_buildings_residential_main["cell_id_df2"]
1001
    )
1002
1003
    # Retain original values where no main building has been found
1004
    # (centroid of building part not in a cell)
1005
    mapping_profiles_to_buildings["cell_id"].fillna(
1006
        mapping_profiles_to_buildings2["cell_id"], inplace=True
1007
    )
1008
    mapping_profiles_to_buildings["cell_id"] = mapping_profiles_to_buildings[
1009
        "cell_id"
1010
    ].astype(int)
1011
1012
    # ========== Write results to DB ==========
1013
1014
    OsmBuildingsSynthetic.__table__.drop(bind=engine, checkfirst=True)
1015
    OsmBuildingsSynthetic.__table__.create(bind=engine, checkfirst=True)
1016
1017
    # Write new buildings incl coord into db
1018
    n_amenities_inside_type = OsmBuildingsSynthetic.n_amenities_inside.type
1019
    synthetic_buildings.to_postgis(
1020
        "osm_buildings_synthetic",
1021
        con=engine,
1022
        if_exists="append",
1023
        schema="openstreetmap",
1024
        dtype={
1025
            "id": OsmBuildingsSynthetic.id.type,
1026
            "cell_id": OsmBuildingsSynthetic.cell_id.type,
1027
            "geom_building": OsmBuildingsSynthetic.geom_building.type,
1028
            "geom_point": OsmBuildingsSynthetic.geom_point.type,
1029
            "n_amenities_inside": n_amenities_inside_type,
1030
            "building": OsmBuildingsSynthetic.building.type,
1031
            "area": OsmBuildingsSynthetic.area.type,
1032
        },
1033
    )
1034
1035
    HouseholdElectricityProfilesOfBuildings.__table__.drop(
1036
        bind=engine, checkfirst=True
1037
    )
1038
    HouseholdElectricityProfilesOfBuildings.__table__.create(
1039
        bind=engine, checkfirst=True
1040
    )
1041
1042
    # Write building mapping into db
1043
    with db.session_scope() as session:
1044
        session.bulk_insert_mappings(
1045
            HouseholdElectricityProfilesOfBuildings,
1046
            mapping_profiles_to_buildings.to_dict(orient="records"),
1047
        )
1048
1049
1050
def create_buildings_profiles_stats():
1051
    """
1052
    Create DB table `demand.egon_household_electricity_profile_of_buildings_stats`
1053
    with household profile type counts per building
1054
    """
1055
1056
    # Drop and recreate table if existing
1057
    HouseholdElectricityProfilesOfBuildingsStats.__table__.drop(
1058
        bind=engine, checkfirst=True
1059
    )
1060
    HouseholdElectricityProfilesOfBuildingsStats.__table__.create(
1061
        bind=engine, checkfirst=True
1062
    )
1063
1064
    # Query final profile table
1065
    with db.session_scope() as session:
1066
        cells_query = session.query(
1067
            HouseholdElectricityProfilesOfBuildings,
1068
        ).order_by(HouseholdElectricityProfilesOfBuildings.id)
1069
1070
        df_buildings_and_profiles = pd.read_sql(
1071
            cells_query.statement, cells_query.session.bind, index_col="id"
1072
        )
1073
1074
    # Extract household type prefix
1075
    df_buildings_and_profiles = df_buildings_and_profiles.assign(
1076
        household_type=df_buildings_and_profiles.profile_id.str[:2]
1077
    )
1078
1079
    # Unstack and create total
1080
    df_buildings_and_profiles = (
1081
        df_buildings_and_profiles.groupby("building_id")
1082
        .value_counts(["household_type"])
1083
        .unstack(fill_value=0)
1084
    )
1085
    df_buildings_and_profiles["households_total"] = (
1086
        df_buildings_and_profiles.sum(axis=1)
1087
    )
1088
1089
    # Write to DB
1090
    df_buildings_and_profiles.to_sql(
1091
        name=HouseholdElectricityProfilesOfBuildingsStats.__table__.name,
1092
        schema=HouseholdElectricityProfilesOfBuildingsStats.__table__.schema,
1093
        con=engine,
1094
        if_exists="append",
1095
    )
1096
1097
1098
class setup(Dataset):
1099
    """
1100
    Household electricity demand profiles for scenarios in 2035 and 2050
1101
    assigned to buildings.
1102
1103
    Assignment of household electricity demand timeseries to OSM buildings
1104
    and generation of randomly placed synthetic 5x5m buildings if no
1105
    sufficient OSM-data available in the respective census cell.
1106
1107
    For more information see data documentation on :ref:`electricity-demand-ref`.
1108
1109
    *Dependencies*
1110
      * :py:func:`houseprofiles_in_census_cells
1111
        <egon.data.datasets.electricity_demand_timeseries.hh_profiles.houseprofiles_in_census_cells>`
1112
1113
    *Resulting tables*
1114
      * :py:class:`OsmBuildingsSynthetic
1115
        <egon.data.datasets.electricity_demand_timeseries.hh_buildings.OsmBuildingsSynthetic>`
1116
        is created and filled
1117
      * :py:class:`HouseholdElectricityProfilesOfBuildings
1118
        <egon.data.datasets.electricity_demand_timeseries.hh_buildings.HouseholdElectricityProfilesOfBuildings>`
1119
        is created and filled
1120
      * :py:class:`BuildingElectricityPeakLoads
1121
        <egon.data.datasets.electricity_demand_timeseries.hh_buildings.BuildingElectricityPeakLoads>`
1122
        is created and filled
1123
1124
    **The following datasets from the database are used for creation:**
1125
1126
    * `demand.household_electricity_profiles_in_census_cells`:
1127
        Lists references and scaling parameters to time series data for each
1128
        household in a cell by identifiers. This table is fundamental for
1129
        creating subsequent data like demand profiles on MV grid level or
1130
        for determining the peak load at load. Only the profile reference
1131
        and the cell identifiers are used.
1132
1133
    * `society.egon_destatis_zensus_apartment_building_population_per_ha`:
1134
        Lists number of apartments, buildings and population for each census
1135
        cell.
1136
1137
    * `boundaries.egon_map_zensus_buildings_residential`:
1138
        List of OSM tagged buildings which are considered to be residential.
1139
1140
1141
    **What is the goal?**
1142
1143
    To assign every household demand profile allocated each census cell to a
1144
    specific building.
1145
1146
    **What is the challenge?**
1147
1148
    The census and the OSM dataset differ from each other. The census uses
1149
    statistical methods and therefore lacks accuracy at high spatial
1150
    resolution. The OSM dataset is a community based dataset which is
1151
    extended throughout and does not claim to be complete. By merging these
1152
    datasets inconsistencies need to be addressed. For example: not yet
1153
    tagged buildings in OSM or new building areas not considered in census
1154
    2011.
1155
1156
    **How are these datasets combined?**
1157
1158
    The assignment of household demand timeseries to buildings takes place
1159
    at cell level. Within each cell a pool of profiles exists, produced by
1160
    the 'HH Demand" module. These profiles are randomly assigned to a
1161
    filtered list of OSM buildings within this cell. Every profile is
1162
    assigned to a building and every building get a profile assigned if
1163
    there is enough households by the census data. If there are more
1164
    profiles than buildings, all additional profiles are randomly assigned.
1165
    Therefore, multiple profiles can be assigned to one building, making it a
1166
    multi-household building. If there are no OSM buildings available,
1167
    synthetic ones are created (see below).
1168
1169
    **What are central assumptions during the data processing?**
1170
1171
    * Mapping zensus data to OSM data is not trivial.
1172
      Discrepancies are substituted.
1173
    * Missing OSM buildings are generated by census building count.
1174
    * If no census building count data is available, the number of buildings
1175
      is derived by an average rate of households/buildings applied to the
1176
      number of households.
1177
1178
    **Drawbacks and limitations of the data**
1179
1180
    * Missing OSM buildings in cells without census building count are
1181
      derived by an average (median) rate of households/buildings applied
1182
      to the number of households. We use a twofold approach for the rate:
1183
      first, the rate is calculated using adjacent cells (function
1184
      `find_adjacent_cells()`), a distance of 3 cells in each direction is
1185
      used by default (resulting in a 7x7 lookup matrix). For the remaining
1186
      cells, i.e. cells without any rate in the adjacent cells, the global
1187
      median rate is used.
1188
1189
      As only whole houses can exist, the substitute is ceiled to the next
1190
      higher integer. Ceiling is applied to avoid rounding to amount of 0
1191
      buildings.
1192
1193
    * As this dataset uses the load profile assignment at census cell level
1194
      conducted in hh_profiles.py, also check drawbacks and limitations in that module.
1195
1196
    **Example Query**
1197
1198
    * Get a list with number of houses, households and household types per
1199
      census cell
1200
1201
    .. code-block:: SQL
1202
1203
        SELECT t1.cell_id, building_count, hh_count, hh_types FROM (
1204
            SELECT
1205
                cell_id,
1206
                COUNT(DISTINCT(building_id)) AS building_count,
1207
                COUNT(profile_id) AS hh_count
1208
            FROM demand.egon_household_electricity_profile_of_buildings
1209
            GROUP BY cell_id
1210
        ) AS t1
1211
        FULL OUTER JOIN (
1212
            SELECT
1213
                cell_id,
1214
                array_agg(
1215
                    array[CAST(hh_10types AS char), hh_type]
1216
                ) AS hh_types
1217
            FROM society.egon_destatis_zensus_household_per_ha_refined
1218
            GROUP BY cell_id
1219
        ) AS t2
1220
        ON t1.cell_id = t2.cell_id
1221
1222
    """
1223
1224
    #:
1225
    name: str = "Demand_Building_Assignment"
1226
    #:
1227
    version: str = "0.0.7"
1228
    #:
1229
    tasks = (
1230
        map_houseprofiles_to_buildings,
1231
        create_buildings_profiles_stats,
1232
        get_building_peak_loads,
1233
    )
1234
1235
    def __init__(self, dependencies):
1236
        super().__init__(
1237
            name=self.name,
1238
            version=self.version,
1239
            dependencies=dependencies,
1240
            tasks=self.tasks,
1241
            validation={
1242
                "data_quality": [
1243
                    RowCountValidation(
1244
                        table=" demand.egon_building_electricity_peak_loads",
1245
                        rule_id="ROW_COUNT.egon_building_electricity_peak_loads",
1246
                        expected_count={"Schleswig-Holstein": 3054820, "Everything": 44683620}
1247
                    ),
1248
                    DataTypeValidation(
1249
                        table="demand.egon_building_electricity_peak_loads",
1250
                        rule_id="DATA_MULTIPLE_TYPES.egon_building_electricity_peak_loads",
1251
                        column_types={"building_id": "integer", "scenario": "character varying", "sector": "character varying", "peak_load_in_w": "real", "voltage_level": "integer"}
1252
                    ),
1253
                    WholeTableNotNullAndNotNaNValidation(
1254
                        table="demand.egon_building_electricity_peak_loads",
1255
                        rule_id="WHOLE_TABLE_NOT_NAN.egon_building_electricity_peak_loads"
1256
                    ),
1257
                    ValueSetValidation(
1258
                        table="demand.egon_building_electricity_peak_loads",
1259
                        rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_building_electricity_peak_loads",
1260
                        column="scenario",
1261
                        expected_values=["eGon2035", "eGon100RE"]
1262
                    ),
1263
                    ValueSetValidation(
1264
                        table="demand.egon_building_electricity_peak_loads",
1265
                        rule_id="VALUE_SET_VALIDATION_SECTOR.egon_building_electricity_peak_loads",
1266
                        column="sector",
1267
                        expected_values=["cts", "residential"]
1268
                    ),
1269
                    RowCountValidation(
1270
                        table=" demand.egon_building_heat_peak_loads",
1271
                        rule_id="ROW_COUNT.egon_building_heat_peak_loads",
1272
                        expected_count={"Schleswig-Holstein": 732905, "Everything": 42128819}
1273
                    ),
1274
                    DataTypeValidation(
1275
                        table="demand.egon_building_heat_peak_loads",
1276
                        rule_id="DATA_MULTIPLE_TYPES.egon_building_heat_peak_loads",
1277
                        column_types={"building_id": "integer", "scenario": "character varying", "sector": "character varying", "peak_load_in_w": "real"}
1278
                    ),
1279
                    WholeTableNotNullAndNotNaNValidation(
1280
                        table="demand.egon_building_heat_peak_loads",
1281
                        rule_id="WHOLE_TABLE_NOT_NAN.egon_building_heat_peak_loads"
1282
                    ),
1283
                    ValueSetValidation(
1284
                        table="demand.egon_building_heat_peak_loads",
1285
                        rule_id="VALUE_SET_VALIDATION_SCENARIO.egon_building_heat_peak_loads",
1286
                        column="scenario",
1287
                        expected_values=["eGon2035", "eGon100RE"]
1288
                    ),
1289
                    ValueSetValidation(
1290
                        table="demand.egon_building_heat_peak_loads",
1291
                        rule_id="VALUE_SET_VALIDATION_SECTOR.egon_building_heat_peak_loads",
1292
                        column="sector",
1293
                        expected_values=["residential+cts"]
1294
                    ),
1295
                    RowCountValidation(
1296
                        table=" demand.egon_household_electricity_profile_of_buildings",
1297
                        rule_id="ROW_COUNT.egon_household_electricity_profile_of_buildings",
1298
                        expected_count={"Schleswig-Holstein": 1371592, "Everything": 38605221}
1299
                    ),
1300
                    DataTypeValidation(
1301
                        table="demand.egon_household_electricity_profile_of_buildings",
1302
                        rule_id="DATA_MULTIPLE_TYPES.egon_household_electricity_profile_of_buildings",
1303
                        column_types={"id": "integer", "building_id": "integer", "cell_id": "integer",
1304
                                      "profile_id": "character varying"}
1305
                    ),
1306
                    WholeTableNotNullAndNotNaNValidation(
1307
                        table="demand.egon_household_electricity_profile_of_buildings",
1308
                        rule_id="WHOLE_TABLE_NOT_NAN.egon_household_electricity_profile_of_buildings"
1309
                    ),
1310
                ]
1311
            },
1312
            on_validation_failure="continue"
1313
        )
1314