Passed
Pull Request — dev (#1008)
by
unknown
01:40
created

allocate_scenarios()   B

Complexity

Conditions 1

Size

Total Lines 104
Code Lines 58

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 58
dl 0
loc 104
rs 8.3745
c 0
b 0
f 0
cc 1
nop 4

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
Distribute MaStR PV rooftop capacities to OSM and synthetic buildings. Generate new
3
PV rooftop generators for scenarios eGon2035 and eGon100RE.
4
Data cleaning: Drop duplicates and entries with missing critical data. Determine most
5
plausible capacity from multiple values given in MaStR data. Drop generators which don't
6
have any plausible capacity data (23.5MW > P > 0.1). Randomly and weighted add a
7
start-up date if it is missing. Extract zip and municipality from 'Standort' given in
8
MaStR data. Geocode unique zip and municipality combinations with Nominatim (1sec
9
delay). Drop generators for which geocoding failed or which are located outside the
10
municipalities of Germany. Add some visual sanity checks for cleaned data.
11
Allocation of MaStR data: Allocate each generator to an existing building from OSM.
12
Determine the quantile each generator and building is in depending on the capacity of
13
the generator and the area of the polygon of the building. Randomly distribute
14
generators within each municipality preferably within the same building area quantile as
15
the generators are capacity wise. If not enough buildings exists within a municipality
16
and quantile additional buildings from other quantiles are chosen randomly.
17
Desegregation of pv rooftop scenarios: The scenario data per federal state is linear
18
distributed to the mv grid districts according to the pv rooftop potential per mv grid
19
district. The rooftop potential is estimated from the building area given from the OSM
20
buildings. Grid districts, which are located in several federal states, are allocated PV
21
capacity according to their respective roof potential in the individual federal states.
22
The desegregation of PV plants within a grid districts respects existing plants from
23
MaStR, which did not reach their end of life. New PV plants are randomly and weighted
24
generated using a breakdown of MaStR data as generator basis. Plant metadata (e.g. plant
25
orientation) is also added random and weighted from MaStR data as basis.
26
"""
27
from __future__ import annotations
28
29
from collections import Counter
30
from functools import wraps
31
from pathlib import Path
32
from time import perf_counter
33
from typing import Any
34
35
from geoalchemy2 import Geometry
36
from geopy.extra.rate_limiter import RateLimiter
37
from geopy.geocoders import Nominatim
38
from loguru import logger
39
from numpy.random import RandomState, default_rng
40
from pyproj.crs.crs import CRS
41
from sqlalchemy import BigInteger, Column, Float, Integer, String
42
from sqlalchemy.dialects.postgresql import HSTORE
43
from sqlalchemy.ext.declarative import declarative_base
44
import geopandas as gpd
45
import numpy as np
46
import pandas as pd
47
48
from egon.data import config, db
49
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
50
    OsmBuildingsSynthetic,
51
)
52
from egon.data.datasets.scenario_capacities import EgonScenarioCapacities
53
from egon.data.datasets.zensus_vg250 import Vg250Gem
54
55
engine = db.engine()
56
Base = declarative_base()
57
SEED = int(config.settings()["egon-data"]["--random-seed"])
58
59
# TODO: move to yml
60
# mastr data
61
MASTR_RELEVANT_COLS = [
62
    "EinheitMastrNummer",
63
    "Bruttoleistung",
64
    "StatisikFlag",
65
    "Bruttoleistung_extended",
66
    "Nettonennleistung",
67
    "InstallierteLeistung",
68
    "zugeordneteWirkleistungWechselrichter",
69
    "EinheitBetriebsstatus",
70
    "Standort",
71
    "Bundesland",
72
    "Land",
73
    "Landkreis",
74
    "Gemeinde",
75
    "Postleitzahl",
76
    "Ort",
77
    "GeplantesInbetriebnahmedatum",
78
    "Inbetriebnahmedatum",
79
    "GemeinsamerWechselrichterMitSpeicher",
80
    "Lage",
81
    "Leistungsbegrenzung",
82
    "EinheitlicheAusrichtungUndNeigungswinkel",
83
    "Hauptausrichtung",
84
    "HauptausrichtungNeigungswinkel",
85
    "Nebenausrichtung",
86
]
87
88
MASTR_DTYPES = {
89
    "EinheitMastrNummer": str,
90
    "Bruttoleistung": float,
91
    "StatisikFlag": str,
92
    "Bruttoleistung_extended": float,
93
    "Nettonennleistung": float,
94
    "InstallierteLeistung": float,
95
    "zugeordneteWirkleistungWechselrichter": float,
96
    "EinheitBetriebsstatus": str,
97
    "Standort": str,
98
    "Bundesland": str,
99
    "Land": str,
100
    "Landkreis": str,
101
    "Gemeinde": str,
102
    # "Postleitzahl": int,  # fails because of nan values
103
    "Ort": str,
104
    "GemeinsamerWechselrichterMitSpeicher": str,
105
    "Lage": str,
106
    "Leistungsbegrenzung": str,
107
    # this will parse nan values as false wich is not always correct
108
    # "EinheitlicheAusrichtungUndNeigungswinkel": bool,
109
    "Hauptausrichtung": str,
110
    "HauptausrichtungNeigungswinkel": str,
111
    "Nebenausrichtung": str,
112
    "NebenausrichtungNeigungswinkel": str,
113
}
114
115
MASTR_PARSE_DATES = [
116
    "GeplantesInbetriebnahmedatum",
117
    "Inbetriebnahmedatum",
118
]
119
120
MASTR_INDEX_COL = "EinheitMastrNummer"
121
122
EPSG = 4326
123
SRID = 3035
124
125
# data cleaning
126
MAX_REALISTIC_PV_CAP = 23500
127
MIN_REALISTIC_PV_CAP = 0.1
128
ROUNDING = 1
129
130
# geopy
131
MIN_DELAY_SECONDS = 1
132
USER_AGENT = "rli_kh_geocoder"
133
134
# show additional logging information
135
VERBOSE = False
136
137
EXPORT_DIR = Path(__name__).resolve().parent / "data"
138
EXPORT_FILE = "mastr_geocoded.gpkg"
139
EXPORT_PATH = EXPORT_DIR / EXPORT_FILE
140
DRIVER = "GPKG"
141
142
# Number of quantiles
143
Q = 5
144
145
# Scenario Data
146
CARRIER = "solar_rooftop"
147
SCENARIOS = ["eGon2035", "eGon100RE"]
148
SCENARIO_TIMESTAMP = {
149
    "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"),
150
    "eGon100RE": pd.Timestamp("2050-01-01", tz="UTC"),
151
}
152
PV_ROOFTOP_LIFETIME = pd.Timedelta(20 * 365, unit="D")
153
154
# Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp
155
# https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf
156
MODUL_CAP = 0.4  # kWp
157
MODUL_SIZE = 1.096 * 1.754  # m²
158
PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE
159
160
# Estimation of usable roof area
161
# Factor for the conversion of building area to roof area
162
# estimation mean roof pitch: 35°
163
# estimation usable roof share: 80%
164
# estimation that only the south side of the building is used for pv
165
# see https://mediatum.ub.tum.de/doc/%20969497/969497.pdf
166
# AREA_FACTOR = 1.221
167
# USABLE_ROOF_SHARE = 0.8
168
# SOUTH_SHARE = 0.5
169
# ROOF_FACTOR = AREA_FACTOR * USABLE_ROOF_SHARE * SOUTH_SHARE
170
ROOF_FACTOR = 0.5
171
172
CAP_RANGES = [
173
    (0, 30),
174
    (30, 100),
175
    (100, float("inf")),
176
]
177
178
MIN_BUILDING_SIZE = 10.0
179
UPPER_QUNATILE = 0.95
180
LOWER_QUANTILE = 0.05
181
182
COLS_TO_RENAME = {
183
    "EinheitlicheAusrichtungUndNeigungswinkel": (
184
        "einheitliche_ausrichtung_und_neigungswinkel"
185
    ),
186
    "Hauptausrichtung": "hauptausrichtung",
187
    "HauptausrichtungNeigungswinkel": "hauptausrichtung_neigungswinkel",
188
}
189
190
COLS_TO_EXPORT = [
191
    "scenario",
192
    "building_id",
193
    "gens_id",
194
    "capacity",
195
    "einheitliche_ausrichtung_und_neigungswinkel",
196
    "hauptausrichtung",
197
    "hauptausrichtung_neigungswinkel",
198
    "voltage_level",
199
]
200
201
# TODO
202
INCLUDE_SYNTHETIC_BUILDINGS = True
203
ONLY_BUILDINGS_WITH_DEMAND = True
204
TEST_RUN = False
205
206
207
def timer_func(func):
208
    @wraps(func)
209
    def timeit_wrapper(*args, **kwargs):
210
        start_time = perf_counter()
211
        result = func(*args, **kwargs)
212
        end_time = perf_counter()
213
        total_time = end_time - start_time
214
        logger.debug(
215
            f"Function {func.__name__} took {total_time:.4f} seconds."
216
        )
217
        return result
218
219
    return timeit_wrapper
220
221
222
@timer_func
223
def mastr_data(
224
    index_col: str | int | list[str] | list[int],
225
    usecols: list[str],
226
    dtype: dict[str, Any] | None,
227
    parse_dates: list[str] | None,
228
) -> pd.DataFrame:
229
    """
230
    Read MaStR data from csv.
231
232
    Parameters
233
    -----------
234
    index_col : str, int or list of str or int
235
        Column(s) to use as the row labels of the DataFrame.
236
    usecols : list of str
237
        Return a subset of the columns.
238
    dtype : dict of column (str) -> type (any), optional
239
        Data type for data or columns.
240
    parse_dates : list of names (str), optional
241
        Try to parse given columns to datetime.
242
    Returns
243
    -------
244
    pandas.DataFrame
245
        DataFrame containing MaStR data.
246
    """
247
    mastr_path = Path(
248
        config.datasets()["power_plants"]["sources"]["mastr_pv"]
249
    ).resolve()
250
251
    mastr_df = pd.read_csv(
252
        mastr_path,
253
        index_col=index_col,
254
        usecols=usecols,
255
        dtype=dtype,
256
        parse_dates=parse_dates,
257
    )
258
259
    mastr_df = mastr_df.loc[
260
        (mastr_df.StatisikFlag == "B")
261
        & (mastr_df.EinheitBetriebsstatus == "InBetrieb")
262
        & (mastr_df.Land == "Deutschland")
263
        & (mastr_df.Lage == "BaulicheAnlagen")
264
    ]
265
266
    if (
267
        config.settings()["egon-data"]["--dataset-boundary"]
268
        == "Schleswig-Holstein"
269
    ):
270
        init_len = len(mastr_df)
271
272
        mastr_df = mastr_df.loc[mastr_df.Bundesland == "SchleswigHolstein"]
273
274
        logger.info(
275
            f"Using only MaStR data within Schleswig-Holstein. "
276
            f"{init_len - len(mastr_df)} of {init_len} generators are dropped."
277
        )
278
279
    logger.debug("MaStR data loaded.")
280
281
    return mastr_df
282
283
284
@timer_func
285
def clean_mastr_data(
286
    mastr_df: pd.DataFrame,
287
    max_realistic_pv_cap: int | float,
288
    min_realistic_pv_cap: int | float,
289
    rounding: int,
290
    seed: int,
291
) -> pd.DataFrame:
292
    """
293
    Clean the MaStR data from implausible data.
294
295
    * Drop MaStR ID duplicates.
296
    * Drop generators with implausible capacities.
297
    * Drop generators without any kind of start-up date.
298
    * Clean up Standort column and capacity.
299
300
    Parameters
301
    -----------
302
    mastr_df : pandas.DataFrame
303
        DataFrame containing MaStR data.
304
    max_realistic_pv_cap : int or float
305
        Maximum capacity, which is considered to be realistic.
306
    min_realistic_pv_cap : int or float
307
        Minimum capacity, which is considered to be realistic.
308
    rounding : int
309
        Rounding to use when cleaning up capacity. E.g. when
310
        rounding is 1 a capacity of 9.93 will be rounded to 9.9.
311
    seed : int
312
        Seed to use for random operations with NumPy and pandas.
313
    Returns
314
    -------
315
    pandas.DataFrame
316
        DataFrame containing cleaned MaStR data.
317
    """
318
    init_len = len(mastr_df)
319
320
    # drop duplicates
321
    mastr_df = mastr_df.loc[~mastr_df.index.duplicated()]
322
323
    # drop invalid entries in standort
324
    index_to_drop = mastr_df.loc[
325
        (mastr_df.Standort.isna()) | (mastr_df.Standort.isnull())
326
    ].index
327
328
    mastr_df = mastr_df.loc[~mastr_df.index.isin(index_to_drop)]
329
330
    df = mastr_df[
331
        [
332
            "Bruttoleistung",
333
            "Bruttoleistung_extended",
334
            "Nettonennleistung",
335
            "zugeordneteWirkleistungWechselrichter",
336
            "InstallierteLeistung",
337
        ]
338
    ].round(rounding)
339
340
    # use only the smallest capacity rating if multiple are given
341
    mastr_df = mastr_df.assign(
342
        capacity=[
343
            most_plausible(p_tub, min_realistic_pv_cap)
344
            for p_tub in df.itertuples(index=False)
345
        ]
346
    )
347
348
    # drop generators without any capacity info
349
    # and capacity of zero
350
    # and if the capacity is > 23.5 MW, because
351
    # Germanies largest rooftop PV is 23 MW
352
    # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379
353
    mastr_df = mastr_df.loc[
354
        (~mastr_df.capacity.isna())
355
        & (mastr_df.capacity <= max_realistic_pv_cap)
356
        & (mastr_df.capacity > min_realistic_pv_cap)
357
    ]
358
359
    # get zip and municipality
360
    mastr_df[["zip_and_municipality", "drop_this"]] = pd.DataFrame(
361
        mastr_df.Standort.astype(str)
362
        .apply(
363
            zip_and_municipality_from_standort,
364
            args=(VERBOSE,),
365
        )
366
        .tolist(),
367
        index=mastr_df.index,
368
    )
369
370
    # drop invalid entries
371
    mastr_df = mastr_df.loc[mastr_df.drop_this].drop(columns="drop_this")
372
373
    # add ", Deutschland" just in case
374
    mastr_df = mastr_df.assign(
375
        zip_and_municipality=(mastr_df.zip_and_municipality + ", Deutschland")
376
    )
377
378
    # get consistent start-up date
379
    mastr_df = mastr_df.assign(
380
        start_up_date=mastr_df.Inbetriebnahmedatum,
381
    )
382
383
    mastr_df.loc[mastr_df.start_up_date.isna()] = mastr_df.loc[
384
        mastr_df.start_up_date.isna()
385
    ].assign(
386
        start_up_date=mastr_df.GeplantesInbetriebnahmedatum.loc[
387
            mastr_df.start_up_date.isna()
388
        ]
389
    )
390
391
    # randomly and weighted fill missing start-up dates
392
    pool = mastr_df.loc[
393
        ~mastr_df.start_up_date.isna()
394
    ].start_up_date.to_numpy()
395
396
    size = len(mastr_df) - len(pool)
397
398
    if size > 0:
399
        np.random.seed(seed)
400
401
        choice = np.random.choice(
402
            pool,
403
            size=size,
404
            replace=False,
405
        )
406
407
        mastr_df.loc[mastr_df.start_up_date.isna()] = mastr_df.loc[
408
            mastr_df.start_up_date.isna()
409
        ].assign(start_up_date=choice)
410
411
        logger.info(
412
            f"Randomly and weigthed added start-up date to {size} generators."
413
        )
414
415
    mastr_df = mastr_df.assign(
416
        start_up_date=pd.to_datetime(mastr_df.start_up_date, utc=True)
417
    )
418
419
    end_len = len(mastr_df)
420
    logger.debug(
421
        f"Dropped {init_len - end_len} "
422
        f"({((init_len - end_len) / init_len) * 100:g}%)"
423
        f" of {init_len} rows from MaStR DataFrame."
424
    )
425
426
    return mastr_df
427
428
429
def zip_and_municipality_from_standort(
430
    standort: str,
431
    verbose: bool = False,
432
) -> tuple[str, bool]:
433
    """
434
    Get zip code and municipality from Standort string split into a list.
435
    Parameters
436
    -----------
437
    standort : str
438
        Standort as given from MaStR data.
439
    verbose : bool
440
        Logs additional info if True.
441
    Returns
442
    -------
443
    str
444
        Standort with only the zip code and municipality
445
        as well a ', Germany' added.
446
    """
447
    if verbose:
448
        logger.debug(f"Uncleaned String: {standort}")
449
450
    standort_list = standort.split()
451
452
    found = False
453
    count = 0
454
455
    for count, elem in enumerate(standort_list):
456
        if len(elem) != 5:
457
            continue
458
        if not elem.isnumeric():
459
            continue
460
461
        found = True
462
463
        break
464
465
    if found:
466
        cleaned_str = " ".join(standort_list[count:])
467
468
        if verbose:
469
            logger.debug(f"Cleaned String:   {cleaned_str}")
470
471
        return cleaned_str, found
472
473
    logger.warning(
474
        "Couldn't identify zip code. This entry will be dropped."
475
        f" Original standort: {standort}."
476
    )
477
478
    return standort, found
479
480
481
def most_plausible(
482
    p_tub: tuple,
483
    min_realistic_pv_cap: int | float,
484
) -> float:
485
    """
486
    Try to determine the most plausible capacity.
487
    Try to determine the most plausible capacity from a given
488
    generator from MaStR data.
489
    Parameters
490
    -----------
491
    p_tub : tuple
492
        Tuple containing the different capacities given in
493
        the MaStR data.
494
    min_realistic_pv_cap : int or float
495
        Minimum capacity, which is considered to be realistic.
496
    Returns
497
    -------
498
    float
499
        Capacity of the generator estimated as the most realistic.
500
    """
501
    count = Counter(p_tub).most_common(3)
502
503
    if len(count) == 1:
504
        return count[0][0]
505
506
    val1 = count[0][0]
507
    val2 = count[1][0]
508
509
    if len(count) == 2:
510
        min_val = min(val1, val2)
511
        max_val = max(val1, val2)
512
    else:
513
        val3 = count[2][0]
514
515
        min_val = min(val1, val2, val3)
516
        max_val = max(val1, val2, val3)
517
518
    if min_val < min_realistic_pv_cap:
519
        return max_val
520
521
    return min_val
522
523
524
def geocoder(
525
    user_agent: str,
526
    min_delay_seconds: int,
527
) -> RateLimiter:
528
    """
529
    Setup Nominatim geocoding class.
530
    Parameters
531
    -----------
532
    user_agent : str
533
        The app name.
534
    min_delay_seconds : int
535
        Delay in seconds to use between requests to Nominatim.
536
        A minimum of 1 is advised.
537
    Returns
538
    -------
539
    geopy.extra.rate_limiter.RateLimiter
540
        Nominatim RateLimiter geocoding class to use for geocoding.
541
    """
542
    locator = Nominatim(user_agent=user_agent)
543
    return RateLimiter(
544
        locator.geocode,
545
        min_delay_seconds=min_delay_seconds,
546
    )
547
548
549
def geocoding_data(
550
    clean_mastr_df: pd.DataFrame,
551
) -> pd.DataFrame:
552
    """
553
    Setup DataFrame to geocode.
554
    Parameters
555
    -----------
556
    clean_mastr_df : pandas.DataFrame
557
        DataFrame containing cleaned MaStR data.
558
    Returns
559
    -------
560
    pandas.DataFrame
561
        DataFrame containing all unique combinations of
562
        zip codes with municipalities for geocoding.
563
    """
564
    return pd.DataFrame(
565
        data=clean_mastr_df.zip_and_municipality.unique(),
566
        columns=["zip_and_municipality"],
567
    )
568
569
570
@timer_func
571
def geocode_data(
572
    geocoding_df: pd.DataFrame,
573
    ratelimiter: RateLimiter,
574
    epsg: int,
575
) -> gpd.GeoDataFrame:
576
    """
577
    Geocode zip code and municipality.
578
    Extract latitude, longitude and altitude.
579
    Transfrom latitude and longitude to shapely
580
    Point and return a geopandas GeoDataFrame.
581
    Parameters
582
    -----------
583
    geocoding_df : pandas.DataFrame
584
        DataFrame containing all unique combinations of
585
        zip codes with municipalities for geocoding.
586
    ratelimiter : geopy.extra.rate_limiter.RateLimiter
587
        Nominatim RateLimiter geocoding class to use for geocoding.
588
    epsg : int
589
        EPSG ID to use as CRS.
590
    Returns
591
    -------
592
    geopandas.GeoDataFrame
593
        GeoDataFrame containing all unique combinations of
594
        zip codes with municipalities with matching geolocation.
595
    """
596
    logger.info(f"Geocoding {len(geocoding_df)} locations.")
597
598
    geocode_df = geocoding_df.assign(
599
        location=geocoding_df.zip_and_municipality.apply(ratelimiter)
600
    )
601
602
    geocode_df = geocode_df.assign(
603
        point=geocode_df.location.apply(
604
            lambda loc: tuple(loc.point) if loc else None
605
        )
606
    )
607
608
    geocode_df[["latitude", "longitude", "altitude"]] = pd.DataFrame(
609
        geocode_df.point.tolist(), index=geocode_df.index
610
    )
611
612
    return gpd.GeoDataFrame(
613
        geocode_df,
614
        geometry=gpd.points_from_xy(geocode_df.longitude, geocode_df.latitude),
615
        crs=f"EPSG:{epsg}",
616
    )
617
618
619
def merge_geocode_with_mastr(
620
    clean_mastr_df: pd.DataFrame, geocode_gdf: gpd.GeoDataFrame
621
) -> gpd.GeoDataFrame:
622
    """
623
    Merge geometry to original mastr data.
624
    Parameters
625
    -----------
626
    clean_mastr_df : pandas.DataFrame
627
        DataFrame containing cleaned MaStR data.
628
    geocode_gdf : geopandas.GeoDataFrame
629
        GeoDataFrame containing all unique combinations of
630
        zip codes with municipalities with matching geolocation.
631
    Returns
632
    -------
633
    gepandas.GeoDataFrame
634
        GeoDataFrame containing cleaned MaStR data with
635
        matching geolocation from geocoding.
636
    """
637
    return gpd.GeoDataFrame(
638
        clean_mastr_df.merge(
639
            geocode_gdf[["zip_and_municipality", "geometry"]],
640
            how="left",
641
            left_on="zip_and_municipality",
642
            right_on="zip_and_municipality",
643
        ),
644
        crs=geocode_gdf.crs,
645
    ).set_index(clean_mastr_df.index)
646
647
648
def drop_invalid_entries_from_gdf(
649
    gdf: gpd.GeoDataFrame,
650
) -> gpd.GeoDataFrame:
651
    """
652
    Drop invalid entries from geopandas GeoDataFrame.
653
    TODO: how to omit the logging from geos here???
654
    Parameters
655
    -----------
656
    gdf : geopandas.GeoDataFrame
657
        GeoDataFrame to be checked for validity.
658
    Returns
659
    -------
660
    gepandas.GeoDataFrame
661
        GeoDataFrame with rows with invalid geometries
662
        dropped.
663
    """
664
    valid_gdf = gdf.loc[gdf.is_valid]
665
666
    logger.debug(
667
        f"{len(gdf) - len(valid_gdf)} "
668
        f"({(len(gdf) - len(valid_gdf)) / len(gdf) * 100:g}%) "
669
        f"of {len(gdf)} values were invalid and are dropped."
670
    )
671
672
    return valid_gdf
673
674
675
@timer_func
676
def municipality_data() -> gpd.GeoDataFrame:
677
    """
678
    Get municipality data from eGo^n Database.
679
    Returns
680
    -------
681
    gepandas.GeoDataFrame
682
        GeoDataFrame with municipality data.
683
    """
684
    with db.session_scope() as session:
685
        query = session.query(Vg250Gem.ags, Vg250Gem.geometry.label("geom"))
686
687
    return gpd.read_postgis(
688
        query.statement, query.session.bind, index_col="ags"
689
    )
690
691
692
@timer_func
693
def add_ags_to_gens(
694
    valid_mastr_gdf: gpd.GeoDataFrame,
695
    municipalities_gdf: gpd.GeoDataFrame,
696
) -> gpd.GeoDataFrame:
697
    """
698
    Add information about AGS ID to generators.
699
    Parameters
700
    -----------
701
    valid_mastr_gdf : geopandas.GeoDataFrame
702
        GeoDataFrame with valid and cleaned MaStR data.
703
    municipalities_gdf : geopandas.GeoDataFrame
704
        GeoDataFrame with municipality data.
705
    Returns
706
    -------
707
    gepandas.GeoDataFrame
708
        GeoDataFrame with valid and cleaned MaStR data
709
        with AGS ID added.
710
    """
711
    return valid_mastr_gdf.sjoin(
712
        municipalities_gdf,
713
        how="left",
714
        predicate="intersects",
715
    ).rename(columns={"index_right": "ags"})
716
717
718
def drop_gens_outside_muns(
719
    valid_mastr_gdf: gpd.GeoDataFrame,
720
) -> gpd.GeoDataFrame:
721
    """
722
    Drop all generators outside of municipalities.
723
    Parameters
724
    -----------
725
    valid_mastr_gdf : geopandas.GeoDataFrame
726
        GeoDataFrame with valid and cleaned MaStR data.
727
    Returns
728
    -------
729
    gepandas.GeoDataFrame
730
        GeoDataFrame with valid and cleaned MaStR data
731
        with generatos without an AGS ID dropped.
732
    """
733
    gdf = valid_mastr_gdf.loc[~valid_mastr_gdf.ags.isna()]
734
735
    logger.debug(
736
        f"{len(valid_mastr_gdf) - len(gdf)} "
737
        f"({(len(valid_mastr_gdf) - len(gdf)) / len(valid_mastr_gdf) * 100:g}%) "
738
        f"of {len(valid_mastr_gdf)} values are outside of the municipalities"
739
        " and are therefore dropped."
740
    )
741
742
    return gdf
743
744
745
class EgonMastrPvRoofGeocoded(Base):
746
    __tablename__ = "egon_mastr_pv_roof_geocoded"
747
    __table_args__ = {"schema": "supply"}
748
749
    zip_and_municipality = Column(String, primary_key=True, index=True)
750
    location = Column(String)
751
    point = Column(String)
752
    latitude = Column(Float)
753
    longitude = Column(Float)
754
    altitude = Column(Float)
755
    geometry = Column(Geometry(srid=EPSG))
756
757
758
def create_geocoded_table(geocode_gdf):
759
    """
760
    Create geocoded table mastr pv rooftop
761
    Parameters
762
    -----------
763
    geocode_gdf : geopandas.GeoDataFrame
764
        GeoDataFrame containing geocoding information on pv rooftop locations.
765
    """
766
    EgonMastrPvRoofGeocoded.__table__.drop(bind=engine, checkfirst=True)
767
    EgonMastrPvRoofGeocoded.__table__.create(bind=engine, checkfirst=True)
768
769
    geocode_gdf.to_postgis(
770
        name=EgonMastrPvRoofGeocoded.__table__.name,
771
        schema=EgonMastrPvRoofGeocoded.__table__.schema,
772
        con=db.engine(),
773
        if_exists="append",
774
        index=False,
775
        # dtype={}
776
    )
777
778
779
def geocoded_data_from_db(
780
    epsg: str | int,
781
) -> gpd.GeoDataFrame:
782
    """
783
    Read OSM buildings data from eGo^n Database.
784
    Parameters
785
    -----------
786
    to_crs : pyproj.crs.crs.CRS
787
        CRS to transform geometries to.
788
    Returns
789
    -------
790
    geopandas.GeoDataFrame
791
        GeoDataFrame containing OSM buildings data.
792
    """
793
    with db.session_scope() as session:
794
        query = session.query(
795
            EgonMastrPvRoofGeocoded.zip_and_municipality,
796
            EgonMastrPvRoofGeocoded.geometry,
797
        )
798
799
    return gpd.read_postgis(
800
        query.statement, query.session.bind, geom_col="geometry"
801
    ).to_crs(f"EPSG:{epsg}")
802
803
804
def load_mastr_data():
805
    """Read PV rooftop data from MaStR CSV
806
    Note: the source will be replaced as soon as the MaStR data is available
807
    in DB.
808
    Returns
809
    -------
810
    geopandas.GeoDataFrame
811
        GeoDataFrame containing MaStR data with geocoded locations.
812
    """
813
    mastr_df = mastr_data(
814
        MASTR_INDEX_COL,
815
        MASTR_RELEVANT_COLS,
816
        MASTR_DTYPES,
817
        MASTR_PARSE_DATES,
818
    )
819
820
    clean_mastr_df = clean_mastr_data(
821
        mastr_df,
822
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
823
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
824
        seed=SEED,
825
        rounding=ROUNDING,
826
    )
827
828
    geocode_gdf = geocoded_data_from_db(EPSG)
829
830
    mastr_gdf = merge_geocode_with_mastr(clean_mastr_df, geocode_gdf)
831
832
    valid_mastr_gdf = drop_invalid_entries_from_gdf(mastr_gdf)
833
834
    municipalities_gdf = municipality_data()
835
836
    valid_mastr_gdf = add_ags_to_gens(valid_mastr_gdf, municipalities_gdf)
837
838
    return drop_gens_outside_muns(valid_mastr_gdf)
839
840
841
class OsmBuildingsFiltered(Base):
842
    __tablename__ = "osm_buildings_filtered"
843
    __table_args__ = {"schema": "openstreetmap"}
844
845
    osm_id = Column(BigInteger)
846
    amenity = Column(String)
847
    building = Column(String)
848
    name = Column(String)
849
    geom = Column(Geometry(srid=SRID), index=True)
850
    area = Column(Float)
851
    geom_point = Column(Geometry(srid=SRID), index=True)
852
    tags = Column(HSTORE)
853
    id = Column(BigInteger, primary_key=True, index=True)
854
855
856
@timer_func
857
def osm_buildings(
858
    to_crs: CRS,
859
) -> gpd.GeoDataFrame:
860
    """
861
    Read OSM buildings data from eGo^n Database.
862
    Parameters
863
    -----------
864
    to_crs : pyproj.crs.crs.CRS
865
        CRS to transform geometries to.
866
    Returns
867
    -------
868
    geopandas.GeoDataFrame
869
        GeoDataFrame containing OSM buildings data.
870
    """
871
    with db.session_scope() as session:
872
        query = session.query(
873
            OsmBuildingsFiltered.id,
874
            OsmBuildingsFiltered.area,
875
            OsmBuildingsFiltered.geom_point.label("geom"),
876
        )
877
878
    return gpd.read_postgis(
879
        query.statement, query.session.bind, index_col="id"
880
    ).to_crs(to_crs)
881
882
883
@timer_func
884
def synthetic_buildings(
885
    to_crs: CRS,
886
) -> gpd.GeoDataFrame:
887
    """
888
    Read synthetic buildings data from eGo^n Database.
889
    Parameters
890
    -----------
891
    to_crs : pyproj.crs.crs.CRS
892
        CRS to transform geometries to.
893
    Returns
894
    -------
895
    geopandas.GeoDataFrame
896
        GeoDataFrame containing OSM buildings data.
897
    """
898
    with db.session_scope() as session:
899
        query = session.query(
900
            OsmBuildingsSynthetic.id,
901
            OsmBuildingsSynthetic.area,
902
            OsmBuildingsSynthetic.geom_point.label("geom"),
903
        )
904
905
    return gpd.read_postgis(
906
        query.statement, query.session.bind, index_col="id"
907
    ).to_crs(to_crs)
908
909
910
@timer_func
911
def add_ags_to_buildings(
912
    buildings_gdf: gpd.GeoDataFrame,
913
    municipalities_gdf: gpd.GeoDataFrame,
914
) -> gpd.GeoDataFrame:
915
    """
916
    Add information about AGS ID to buildings.
917
    Parameters
918
    -----------
919
    buildings_gdf : geopandas.GeoDataFrame
920
        GeoDataFrame containing OSM buildings data.
921
    municipalities_gdf : geopandas.GeoDataFrame
922
        GeoDataFrame with municipality data.
923
    Returns
924
    -------
925
    gepandas.GeoDataFrame
926
        GeoDataFrame containing OSM buildings data
927
        with AGS ID added.
928
    """
929
    return buildings_gdf.sjoin(
930
        municipalities_gdf,
931
        how="left",
932
        predicate="intersects",
933
    ).rename(columns={"index_right": "ags"})
934
935
936
def drop_buildings_outside_muns(
937
    buildings_gdf: gpd.GeoDataFrame,
938
) -> gpd.GeoDataFrame:
939
    """
940
    Drop all buildings outside of municipalities.
941
    Parameters
942
    -----------
943
    buildings_gdf : geopandas.GeoDataFrame
944
        GeoDataFrame containing OSM buildings data.
945
    Returns
946
    -------
947
    gepandas.GeoDataFrame
948
        GeoDataFrame containing OSM buildings data
949
        with buildings without an AGS ID dropped.
950
    """
951
    gdf = buildings_gdf.loc[~buildings_gdf.ags.isna()]
952
953
    logger.debug(
954
        f"{len(buildings_gdf) - len(gdf)} "
955
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
956
        f"of {len(buildings_gdf)} values are outside of the municipalities "
957
        "and are therefore dropped."
958
    )
959
960
    return gdf
961
962
963
def egon_building_peak_loads():
964
    sql = """
965
    SELECT building_id
966
    FROM demand.egon_building_electricity_peak_loads
967
    WHERE scenario = 'eGon2035'
968
    """
969
970
    return (
971
        db.select_dataframe(sql).building_id.astype(int).sort_values().unique()
972
    )
973
974
975
@timer_func
976
def load_building_data():
977
    """
978
    Read buildings from DB
979
    Tables:
980
981
    * `openstreetmap.osm_buildings_filtered` (from OSM)
982
    * `openstreetmap.osm_buildings_synthetic` (synthetic, created by us)
983
984
    Use column `id` for both as it is unique hence you concat both datasets. If
985
    INCLUDE_SYNTHETIC_BUILDINGS is False synthetic buildings will not be loaded.
986
987
    Returns
988
    -------
989
    gepandas.GeoDataFrame
990
        GeoDataFrame containing OSM buildings data with buildings without an AGS ID
991
        dropped.
992
    """
993
994
    municipalities_gdf = municipality_data()
995
996
    osm_buildings_gdf = osm_buildings(municipalities_gdf.crs)
997
998
    if INCLUDE_SYNTHETIC_BUILDINGS:
999
        synthetic_buildings_gdf = synthetic_buildings(municipalities_gdf.crs)
1000
1001
        buildings_gdf = gpd.GeoDataFrame(
1002
            pd.concat(
1003
                [
1004
                    osm_buildings_gdf,
1005
                    synthetic_buildings_gdf,
1006
                ]
1007
            ),
1008
            geometry="geom",
1009
            crs=osm_buildings_gdf.crs,
1010
        ).rename(columns={"area": "building_area"})
1011
1012
        buildings_gdf.index = buildings_gdf.index.astype(int)
1013
1014
    else:
1015
        buildings_gdf = osm_buildings_gdf.rename(
1016
            columns={"area": "building_area"}
1017
        )
1018
1019
    if ONLY_BUILDINGS_WITH_DEMAND:
1020
        building_ids = egon_building_peak_loads()
1021
1022
        init_len = len(building_ids)
1023
1024
        building_ids = np.intersect1d(
1025
            list(map(int, building_ids)),
1026
            list(map(int, buildings_gdf.index.to_numpy())),
1027
        )
1028
1029
        end_len = len(building_ids)
1030
1031
        logger.debug(
1032
            f"{end_len/init_len * 100: g} % ({end_len} / {init_len}) of buildings have "
1033
            f"peak load."
1034
        )
1035
1036
        buildings_gdf = buildings_gdf.loc[building_ids]
1037
1038
    buildings_ags_gdf = add_ags_to_buildings(buildings_gdf, municipalities_gdf)
1039
1040
    buildings_ags_gdf = drop_buildings_outside_muns(buildings_ags_gdf)
1041
1042
    grid_districts_gdf = grid_districts(EPSG)
1043
1044
    federal_state_gdf = federal_state_data(grid_districts_gdf.crs)
1045
1046
    grid_federal_state_gdf = overlay_grid_districts_with_counties(
1047
        grid_districts_gdf,
1048
        federal_state_gdf,
1049
    )
1050
1051
    buildings_overlay_gdf = add_overlay_id_to_buildings(
1052
        buildings_ags_gdf,
1053
        grid_federal_state_gdf,
1054
    )
1055
1056
    logger.debug("Loaded buildings.")
1057
1058
    buildings_overlay_gdf = drop_buildings_outside_grids(buildings_overlay_gdf)
1059
1060
    # overwrite bus_id with data from new table
1061
    sql = "SELECT building_id, bus_id FROM boundaries.egon_map_zensus_mvgd_buildings"
1062
    map_building_bus_df = db.select_dataframe(sql)
1063
1064
    building_ids = np.intersect1d(
1065
        list(map(int, map_building_bus_df.building_id.unique())),
1066
        list(map(int, buildings_overlay_gdf.index.to_numpy())),
1067
    )
1068
1069
    buildings_within_gdf = buildings_overlay_gdf.loc[building_ids]
1070
1071
    return (
1072
        buildings_within_gdf.drop(columns=["bus_id"])
1073
        .merge(
1074
            how="left",
1075
            right=map_building_bus_df,
1076
            left_index=True,
1077
            right_on="building_id",
1078
        )
1079
        .drop(columns=["building_id"])
1080
    )
1081
1082
1083
@timer_func
1084
def sort_and_qcut_df(
1085
    df: pd.DataFrame | gpd.GeoDataFrame,
1086
    col: str,
1087
    q: int,
1088
) -> pd.DataFrame | gpd.GeoDataFrame:
1089
    """
1090
    Determine the quantile of a given attribute in a (Geo)DataFrame.
1091
    Sort the (Geo)DataFrame in ascending order for the given attribute.
1092
    Parameters
1093
    -----------
1094
    df : pandas.DataFrame or geopandas.GeoDataFrame
1095
        (Geo)DataFrame to sort and qcut.
1096
    col : str
1097
        Name of the attribute to sort and qcut the (Geo)DataFrame on.
1098
    q : int
1099
        Number of quantiles.
1100
    Returns
1101
    -------
1102
    pandas.DataFrame or gepandas.GeoDataFrame
1103
        Sorted and qcut (Geo)DataFrame.
1104
    """
1105
    df = df.sort_values(col, ascending=True)
1106
1107
    return df.assign(
1108
        quant=pd.qcut(
1109
            df[col],
1110
            q=q,
1111
            labels=range(q),
1112
        )
1113
    )
1114
1115
1116
@timer_func
1117
def allocate_pv(
1118
    q_mastr_gdf: gpd.GeoDataFrame,
1119
    q_buildings_gdf: gpd.GeoDataFrame,
1120
    seed: int,
1121
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
1122
    """
1123
    Allocate the MaStR pv generators to the OSM buildings.
1124
    This will determine a building for each pv generator if there are more
1125
    buildings than generators within a given AGS. Primarily generators are
1126
    distributed with the same qunatile as the buildings. Multiple assignment
1127
    is excluded.
1128
    Parameters
1129
    -----------
1130
    q_mastr_gdf : geopandas.GeoDataFrame
1131
        GeoDataFrame containing geocoded and qcut MaStR data.
1132
    q_buildings_gdf : geopandas.GeoDataFrame
1133
        GeoDataFrame containing qcut OSM buildings data.
1134
    seed : int
1135
        Seed to use for random operations with NumPy and pandas.
1136
    Returns
1137
    -------
1138
    tuple with two geopandas.GeoDataFrame s
1139
        GeoDataFrame containing MaStR data allocated to building IDs.
1140
        GeoDataFrame containing building data allocated to MaStR IDs.
1141
    """
1142
    rng = default_rng(seed=seed)
1143
1144
    q_buildings_gdf = q_buildings_gdf.assign(gens_id=np.nan)
1145
    q_mastr_gdf = q_mastr_gdf.assign(building_id=np.nan)
1146
1147
    ags_list = q_buildings_gdf.ags.unique()
1148
1149
    if TEST_RUN:
1150
        ags_list = ags_list[:250]
1151
1152
    num_ags = len(ags_list)
1153
1154
    t0 = perf_counter()
1155
1156
    for count, ags in enumerate(ags_list):
1157
1158
        buildings = q_buildings_gdf.loc[
1159
            (q_buildings_gdf.ags == ags) & (q_buildings_gdf.gens_id.isna())
1160
        ]
1161
        gens = q_mastr_gdf.loc[
1162
            (q_mastr_gdf.ags == ags) & (q_mastr_gdf.building_id.isna())
1163
        ]
1164
1165
        len_build = len(buildings)
1166
        len_gens = len(gens)
1167
1168
        if len_build < len_gens:
1169
            gens = gens.sample(len_build, random_state=RandomState(seed=seed))
1170
            logger.error(
1171
                f"There are {len_gens} generators and only {len_build}"
1172
                f" buildings in AGS {ags}. {len_gens - len(gens)} "
1173
                "generators were truncated to match the amount of buildings."
1174
            )
1175
1176
            assert len_build == len(gens)
1177
1178
        for quant in gens.quant.unique():
1179
            q_buildings = buildings.loc[
1180
                (buildings.quant == quant) & (buildings.gens_id.isna())
1181
            ]
1182
            q_gens = gens.loc[gens.quant == quant]
1183
1184
            len_build = len(q_buildings)
1185
            len_gens = len(q_gens)
1186
1187
            if len_build < len_gens:
1188
                delta = len_gens - len_build
1189
1190
                logger.warning(
1191
                    f"There are {len_gens} generators and only {len_build} "
1192
                    f"buildings in AGS {ags} and quantile {quant}. {delta} "
1193
                    f"buildings from AGS {ags} will be added randomly."
1194
                )
1195
1196
                add_buildings = pd.Index(
1197
                    rng.choice(
1198
                        buildings.loc[
1199
                            (buildings.quant != quant)
1200
                            & (buildings.gens_id.isna())
1201
                        ].index,
1202
                        size=delta,
1203
                        replace=False,
1204
                    )
1205
                )
1206
1207
                q_buildings = buildings.loc[
1208
                    q_buildings.index.append(add_buildings)
1209
                ]
1210
1211
                assert len(q_buildings) == len_gens
1212
1213
            chosen_buildings = pd.Index(
1214
                rng.choice(
1215
                    q_buildings.index,
1216
                    size=len_gens,
1217
                    replace=False,
1218
                )
1219
            )
1220
1221
            # q_mastr_gdf.loc[q_gens.index, "building_id"] = chosen_buildings
1222
            q_buildings_gdf.loc[chosen_buildings, "gens_id"] = q_gens.index
1223
1224
        if count % 100 == 0:
1225
            logger.debug(
1226
                f"Allocation of {count / num_ags * 100:g} % of AGS done. It took "
1227
                f"{perf_counter() - t0:g} seconds."
1228
            )
1229
1230
            t0 = perf_counter()
1231
1232
    assigned_buildings = q_buildings_gdf.loc[~q_buildings_gdf.gens_id.isna()]
1233
1234
    q_mastr_gdf.loc[
1235
        assigned_buildings.gens_id, "building_id"
1236
    ] = assigned_buildings.index
1237
1238
    logger.debug("Allocated status quo generators to buildings.")
1239
1240
    return frame_to_numeric(q_mastr_gdf), frame_to_numeric(q_buildings_gdf)
1241
1242
1243
def frame_to_numeric(
1244
    df: pd.DataFrame | gpd.GeoDataFrame,
1245
) -> pd.DataFrame | gpd.GeoDataFrame:
1246
    """
1247
    Try to convert all columns of a DataFrame to numeric ignoring errors.
1248
    Parameters
1249
    ----------
1250
    df : pandas.DataFrame or geopandas.GeoDataFrame
1251
    Returns
1252
    -------
1253
    pandas.DataFrame or geopandas.GeoDataFrame
1254
    """
1255
    if str(df.index.dtype) == "object":
1256
        df.index = pd.to_numeric(df.index, errors="ignore")
1257
1258
    for col in df.columns:
1259
        if str(df[col].dtype) == "object":
1260
            df[col] = pd.to_numeric(df[col], errors="ignore")
1261
1262
    return df
1263
1264
1265
def validate_output(
1266
    desagg_mastr_gdf: pd.DataFrame | gpd.GeoDataFrame,
1267
    desagg_buildings_gdf: pd.DataFrame | gpd.GeoDataFrame,
1268
) -> None:
1269
    """
1270
    Validate output.
1271
1272
    * Validate that there are exactly as many buildings with a pv system as there are
1273
      pv systems with a building
1274
    * Validate that the building IDs with a pv system are the same building IDs as
1275
      assigned to the pv systems
1276
    * Validate that the pv system IDs with a building are the same pv system IDs as
1277
      assigned to the buildings
1278
1279
    Parameters
1280
    -----------
1281
    desagg_mastr_gdf : geopandas.GeoDataFrame
1282
        GeoDataFrame containing MaStR data allocated to building IDs.
1283
    desagg_buildings_gdf : geopandas.GeoDataFrame
1284
        GeoDataFrame containing building data allocated to MaStR IDs.
1285
    """
1286
    assert len(
1287
        desagg_mastr_gdf.loc[~desagg_mastr_gdf.building_id.isna()]
1288
    ) == len(desagg_buildings_gdf.loc[~desagg_buildings_gdf.gens_id.isna()])
1289
    assert (
1290
        np.sort(
1291
            desagg_mastr_gdf.loc[
1292
                ~desagg_mastr_gdf.building_id.isna()
1293
            ].building_id.unique()
1294
        )
1295
        == np.sort(
1296
            desagg_buildings_gdf.loc[
1297
                ~desagg_buildings_gdf.gens_id.isna()
1298
            ].index.unique()
1299
        )
1300
    ).all()
1301
    assert (
1302
        np.sort(
1303
            desagg_mastr_gdf.loc[
1304
                ~desagg_mastr_gdf.building_id.isna()
1305
            ].index.unique()
1306
        )
1307
        == np.sort(
1308
            desagg_buildings_gdf.loc[
1309
                ~desagg_buildings_gdf.gens_id.isna()
1310
            ].gens_id.unique()
1311
        )
1312
    ).all()
1313
1314
    logger.debug("Validated output.")
1315
1316
1317
def drop_unallocated_gens(
1318
    gdf: gpd.GeoDataFrame,
1319
) -> gpd.GeoDataFrame:
1320
    """
1321
    Drop generators which did not get allocated.
1322
1323
    Parameters
1324
    -----------
1325
    gdf : geopandas.GeoDataFrame
1326
        GeoDataFrame containing MaStR data allocated to building IDs.
1327
    Returns
1328
    -------
1329
    geopandas.GeoDataFrame
1330
        GeoDataFrame containing MaStR data with generators dropped which did not get
1331
        allocated.
1332
    """
1333
    init_len = len(gdf)
1334
    gdf = gdf.loc[~gdf.building_id.isna()]
1335
    end_len = len(gdf)
1336
1337
    logger.debug(
1338
        f"Dropped {init_len - end_len} "
1339
        f"({((init_len - end_len) / init_len) * 100:g}%)"
1340
        f" of {init_len} unallocated rows from MaStR DataFrame."
1341
    )
1342
1343
    return gdf
1344
1345
1346
@timer_func
1347
def allocate_to_buildings(
1348
    mastr_gdf: gpd.GeoDataFrame,
1349
    buildings_gdf: gpd.GeoDataFrame,
1350
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
1351
    """
1352
    Allocate status quo pv rooftop generators to buildings.
1353
    Parameters
1354
    -----------
1355
    mastr_gdf : geopandas.GeoDataFrame
1356
        GeoDataFrame containing MaStR data with geocoded locations.
1357
    buildings_gdf : geopandas.GeoDataFrame
1358
        GeoDataFrame containing OSM buildings data with buildings without an AGS ID
1359
        dropped.
1360
    Returns
1361
    -------
1362
    tuple with two geopandas.GeoDataFrame s
1363
        GeoDataFrame containing MaStR data allocated to building IDs.
1364
        GeoDataFrame containing building data allocated to MaStR IDs.
1365
    """
1366
    logger.debug("Starting allocation of status quo.")
1367
1368
    q_mastr_gdf = sort_and_qcut_df(mastr_gdf, col="capacity", q=Q)
1369
    q_buildings_gdf = sort_and_qcut_df(buildings_gdf, col="building_area", q=Q)
1370
1371
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_pv(
1372
        q_mastr_gdf, q_buildings_gdf, SEED
1373
    )
1374
1375
    validate_output(desagg_mastr_gdf, desagg_buildings_gdf)
1376
1377
    return drop_unallocated_gens(desagg_mastr_gdf), desagg_buildings_gdf
1378
1379
1380
@timer_func
1381
def grid_districts(
1382
    epsg: int,
1383
) -> gpd.GeoDataFrame:
1384
    """
1385
    Load mv grid district geo data from eGo^n Database as
1386
    geopandas.GeoDataFrame.
1387
    Parameters
1388
    -----------
1389
    epsg : int
1390
        EPSG ID to use as CRS.
1391
    Returns
1392
    -------
1393
    geopandas.GeoDataFrame
1394
        GeoDataFrame containing mv grid district ID and geo shapes data.
1395
    """
1396
    gdf = db.select_geodataframe(
1397
        """
1398
        SELECT bus_id, geom
1399
        FROM grid.egon_mv_grid_district
1400
        ORDER BY bus_id
1401
        """,
1402
        index_col="bus_id",
1403
        geom_col="geom",
1404
        epsg=epsg,
1405
    )
1406
1407
    gdf.index = gdf.index.astype(int)
1408
1409
    logger.debug("Grid districts loaded.")
1410
1411
    return gdf
1412
1413
1414
def scenario_data(
1415
    carrier: str = "solar_rooftop",
1416
    scenario: str = "eGon2035",
1417
) -> pd.DataFrame:
1418
    """
1419
    Get scenario capacity data from eGo^n Database.
1420
    Parameters
1421
    -----------
1422
    carrier : str
1423
        Carrier type to filter table by.
1424
    scenario : str
1425
        Scenario to filter table by.
1426
    Returns
1427
    -------
1428
    geopandas.GeoDataFrame
1429
        GeoDataFrame with scenario capacity data in GW.
1430
    """
1431
    with db.session_scope() as session:
1432
        query = session.query(EgonScenarioCapacities).filter(
1433
            EgonScenarioCapacities.carrier == carrier,
1434
            EgonScenarioCapacities.scenario_name == scenario,
1435
        )
1436
1437
    df = pd.read_sql(
1438
        query.statement, query.session.bind, index_col="index"
1439
    ).sort_index()
1440
1441
    logger.debug("Scenario capacity data loaded.")
1442
1443
    return df
1444
1445
1446 View Code Duplication
class Vg250Lan(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1447
    __tablename__ = "vg250_lan"
1448
    __table_args__ = {"schema": "boundaries"}
1449
1450
    id = Column(BigInteger, primary_key=True, index=True)
1451
    ade = Column(BigInteger)
1452
    gf = Column(BigInteger)
1453
    bsg = Column(BigInteger)
1454
    ars = Column(String)
1455
    ags = Column(String)
1456
    sdv_ars = Column(String)
1457
    gen = Column(String)
1458
    bez = Column(String)
1459
    ibz = Column(BigInteger)
1460
    bem = Column(String)
1461
    nbd = Column(String)
1462
    sn_l = Column(String)
1463
    sn_r = Column(String)
1464
    sn_k = Column(String)
1465
    sn_v1 = Column(String)
1466
    sn_v2 = Column(String)
1467
    sn_g = Column(String)
1468
    fk_s3 = Column(String)
1469
    nuts = Column(String)
1470
    ars_0 = Column(String)
1471
    ags_0 = Column(String)
1472
    wsk = Column(String)
1473
    debkg_id = Column(String)
1474
    rs = Column(String)
1475
    sdv_rs = Column(String)
1476
    rs_0 = Column(String)
1477
    geometry = Column(Geometry(srid=EPSG), index=True)
1478
1479
1480
def federal_state_data(to_crs: CRS) -> gpd.GeoDataFrame:
1481
    """
1482
    Get feder state data from eGo^n Database.
1483
    Parameters
1484
    -----------
1485
    to_crs : pyproj.crs.crs.CRS
1486
        CRS to transform geometries to.
1487
    Returns
1488
    -------
1489
    geopandas.GeoDataFrame
1490
        GeoDataFrame with federal state data.
1491
    """
1492
    with db.session_scope() as session:
1493
        query = session.query(
1494
            Vg250Lan.id, Vg250Lan.nuts, Vg250Lan.geometry.label("geom")
1495
        )
1496
1497
        gdf = gpd.read_postgis(
1498
            query.statement, session.connection(), index_col="id"
1499
        ).to_crs(to_crs)
1500
1501
    logger.debug("Federal State data loaded.")
1502
1503
    return gdf
1504
1505
1506
@timer_func
1507
def overlay_grid_districts_with_counties(
1508
    mv_grid_district_gdf: gpd.GeoDataFrame,
1509
    federal_state_gdf: gpd.GeoDataFrame,
1510
) -> gpd.GeoDataFrame:
1511
    """
1512
    Calculate the intersections of mv grid districts and counties.
1513
    Parameters
1514
    -----------
1515
    mv_grid_district_gdf : gpd.GeoDataFrame
1516
        GeoDataFrame containing mv grid district ID and geo shapes data.
1517
    federal_state_gdf : gpd.GeoDataFrame
1518
        GeoDataFrame with federal state data.
1519
    Returns
1520
    -------
1521
    geopandas.GeoDataFrame
1522
        GeoDataFrame containing OSM buildings data.
1523
    """
1524
    logger.debug(
1525
        "Calculating intersection overlay between mv grid districts and "
1526
        "counties. This may take a while..."
1527
    )
1528
1529
    gdf = gpd.overlay(
1530
        federal_state_gdf.to_crs(mv_grid_district_gdf.crs),
1531
        mv_grid_district_gdf.reset_index(),
1532
        how="intersection",
1533
        keep_geom_type=True,
1534
    )
1535
1536
    logger.debug("Done!")
1537
1538
    return gdf
1539
1540
1541
@timer_func
1542
def add_overlay_id_to_buildings(
1543
    buildings_gdf: gpd.GeoDataFrame,
1544
    grid_federal_state_gdf: gpd.GeoDataFrame,
1545
) -> gpd.GeoDataFrame:
1546
    """
1547
    Add information about overlay ID to buildings.
1548
    Parameters
1549
    -----------
1550
    buildings_gdf : geopandas.GeoDataFrame
1551
        GeoDataFrame containing OSM buildings data.
1552
    grid_federal_state_gdf : geopandas.GeoDataFrame
1553
        GeoDataFrame with intersection shapes between counties and grid districts.
1554
    Returns
1555
    -------
1556
    geopandas.GeoDataFrame
1557
        GeoDataFrame containing OSM buildings data with overlay ID added.
1558
    """
1559
    gdf = (
1560
        buildings_gdf.to_crs(grid_federal_state_gdf.crs)
1561
        .sjoin(
1562
            grid_federal_state_gdf,
1563
            how="left",
1564
            predicate="intersects",
1565
        )
1566
        .rename(columns={"index_right": "overlay_id"})
1567
    )
1568
1569
    logger.debug("Added overlay ID to OSM buildings.")
1570
1571
    return gdf
1572
1573
1574
def drop_buildings_outside_grids(
1575
    buildings_gdf: gpd.GeoDataFrame,
1576
) -> gpd.GeoDataFrame:
1577
    """
1578
    Drop all buildings outside of grid areas.
1579
    Parameters
1580
    -----------
1581
    buildings_gdf : geopandas.GeoDataFrame
1582
        GeoDataFrame containing OSM buildings data.
1583
    Returns
1584
    -------
1585
    gepandas.GeoDataFrame
1586
        GeoDataFrame containing OSM buildings data
1587
        with buildings without an bus ID dropped.
1588
    """
1589
    gdf = buildings_gdf.loc[~buildings_gdf.bus_id.isna()]
1590
1591
    logger.debug(
1592
        f"{len(buildings_gdf) - len(gdf)} "
1593
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
1594
        f"of {len(buildings_gdf)} values are outside of the grid areas "
1595
        "and are therefore dropped."
1596
    )
1597
1598
    return gdf
1599
1600
1601
def cap_per_bus_id(
1602
    scenario: str,
1603
) -> pd.DataFrame:
1604
    """
1605
    Get table with total pv rooftop capacity per grid district.
1606
1607
    Parameters
1608
    -----------
1609
    scenario : str
1610
        Scenario name.
1611
    Returns
1612
    -------
1613
    pandas.DataFrame
1614
        DataFrame with total rooftop capacity per mv grid.
1615
    """
1616
    targets = config.datasets()["solar_rooftop"]["targets"]
1617
1618
    sql = f"""
1619
    SELECT bus as bus_id, p_nom as capacity
1620
    FROM {targets['generators']['schema']}.{targets['generators']['table']}
1621
    WHERE carrier = 'solar_rooftop'
1622
    AND scn_name = '{scenario}'
1623
    """
1624
1625
    return db.select_dataframe(sql, index_col="bus_id")
1626
1627
    # overlay_gdf = overlay_gdf.assign(capacity=np.nan)
1628
    #
1629
    # for cap, nuts in scenario_df[["capacity", "nuts"]].itertuples(index=False):
1630
    #     nuts_gdf = overlay_gdf.loc[overlay_gdf.nuts == nuts]
1631
    #
1632
    #     capacity = nuts_gdf.building_area.multiply(
1633
    #         cap / nuts_gdf.building_area.sum()
1634
    #     )
1635
    #
1636
    #     overlay_gdf.loc[nuts_gdf.index] = overlay_gdf.loc[
1637
    #         nuts_gdf.index
1638
    #     ].assign(capacity=capacity.multiply(conversion).to_numpy())
1639
    #
1640
    # return overlay_gdf[["bus_id", "capacity"]].groupby("bus_id").sum()
1641
1642
1643
def determine_end_of_life_gens(
1644
    mastr_gdf: gpd.GeoDataFrame,
1645
    scenario_timestamp: pd.Timestamp,
1646
    pv_rooftop_lifetime: pd.Timedelta,
1647
) -> gpd.GeoDataFrame:
1648
    """
1649
    Determine if an old PV system has reached its end of life.
1650
    Parameters
1651
    -----------
1652
    mastr_gdf : geopandas.GeoDataFrame
1653
        GeoDataFrame containing geocoded MaStR data.
1654
    scenario_timestamp : pandas.Timestamp
1655
        Timestamp at which the scenario takes place.
1656
    pv_rooftop_lifetime : pandas.Timedelta
1657
        Average expected lifetime of PV rooftop systems.
1658
    Returns
1659
    -------
1660
    geopandas.GeoDataFrame
1661
        GeoDataFrame containing geocoded MaStR data and info if the system
1662
        has reached its end of life.
1663
    """
1664
    before = mastr_gdf.capacity.sum()
1665
1666
    mastr_gdf = mastr_gdf.assign(
1667
        age=scenario_timestamp - mastr_gdf.start_up_date
1668
    )
1669
1670
    mastr_gdf = mastr_gdf.assign(
1671
        end_of_life=pv_rooftop_lifetime < mastr_gdf.age
1672
    )
1673
1674
    after = mastr_gdf.loc[~mastr_gdf.end_of_life].capacity.sum()
1675
1676
    logger.debug(
1677
        "Determined if pv rooftop systems reached their end of life.\nTotal capacity: "
1678
        f"{before}\nActive capacity: {after}"
1679
    )
1680
1681
    return mastr_gdf
1682
1683
1684
def calculate_max_pv_cap_per_building(
1685
    buildings_gdf: gpd.GeoDataFrame,
1686
    mastr_gdf: gpd.GeoDataFrame,
1687
    pv_cap_per_sq_m: float | int,
1688
    roof_factor: float | int,
1689
) -> gpd.GeoDataFrame:
1690
    """
1691
    Calculate the estimated maximum possible PV capacity per building.
1692
    Parameters
1693
    -----------
1694
    buildings_gdf : geopandas.GeoDataFrame
1695
        GeoDataFrame containing OSM buildings data.
1696
    mastr_gdf : geopandas.GeoDataFrame
1697
        GeoDataFrame containing geocoded MaStR data.
1698
    pv_cap_per_sq_m : float, int
1699
        Average expected, installable PV capacity per square meter.
1700
    roof_factor : float, int
1701
        Average for PV usable roof area share.
1702
    Returns
1703
    -------
1704
    geopandas.GeoDataFrame
1705
        GeoDataFrame containing OSM buildings data with estimated maximum PV
1706
        capacity.
1707
    """
1708
    gdf = (
1709
        buildings_gdf.reset_index().rename(columns={"index": "id"})
1710
        .merge(
1711
            mastr_gdf[
1712
                [
1713
                    "capacity",
1714
                    "end_of_life",
1715
                    "building_id",
1716
                    "EinheitlicheAusrichtungUndNeigungswinkel",
1717
                    "Hauptausrichtung",
1718
                    "HauptausrichtungNeigungswinkel",
1719
                ]
1720
            ],
1721
            how="left",
1722
            left_on="id",
1723
            right_on="building_id",
1724
        )
1725
        .set_index("id")
1726
        .drop(columns="building_id")
1727
    )
1728
1729
    return gdf.assign(
1730
        max_cap=gdf.building_area.multiply(roof_factor * pv_cap_per_sq_m),
1731
        end_of_life=gdf.end_of_life.fillna(True).astype(bool),
1732
        bus_id=gdf.bus_id.astype(int),
1733
    )
1734
1735
1736
def calculate_building_load_factor(
1737
    mastr_gdf: gpd.GeoDataFrame,
1738
    buildings_gdf: gpd.GeoDataFrame,
1739
    rounding: int = 4,
1740
) -> gpd.GeoDataFrame:
1741
    """
1742
    Calculate the roof load factor from existing PV systems.
1743
    Parameters
1744
    -----------
1745
    mastr_gdf : geopandas.GeoDataFrame
1746
        GeoDataFrame containing geocoded MaStR data.
1747
    buildings_gdf : geopandas.GeoDataFrame
1748
        GeoDataFrame containing OSM buildings data.
1749
    rounding : int
1750
        Rounding to use for load factor.
1751
    Returns
1752
    -------
1753
    geopandas.GeoDataFrame
1754
        GeoDataFrame containing geocoded MaStR data with calculated load factor.
1755
    """
1756
    gdf = mastr_gdf.merge(
1757
        buildings_gdf[["max_cap", "building_area"]].loc[
1758
            ~buildings_gdf["max_cap"].isna()
1759
        ],
1760
        how="left",
1761
        left_on="building_id",
1762
        right_index=True,
1763
    )
1764
1765
    return gdf.assign(load_factor=(gdf.capacity / gdf.max_cap).round(rounding))
1766
1767
1768
def get_probability_for_property(
1769
    mastr_gdf: gpd.GeoDataFrame,
1770
    cap_range: tuple[int | float, int | float],
1771
    prop: str,
1772
) -> tuple[np.array, np.array]:
1773
    """
1774
    Calculate the probability of the different options of a property of the
1775
    existing PV plants.
1776
    Parameters
1777
    -----------
1778
    mastr_gdf : geopandas.GeoDataFrame
1779
        GeoDataFrame containing geocoded MaStR data.
1780
    cap_range : tuple(int, int)
1781
        Capacity range of PV plants to look at.
1782
    prop : str
1783
        Property to calculate probabilities for. String needs to be in columns
1784
        of mastr_gdf.
1785
    Returns
1786
    -------
1787
    tuple
1788
        numpy.array
1789
            Unique values of property.
1790
        numpy.array
1791
            Probabilties per unique value.
1792
    """
1793
    cap_range_gdf = mastr_gdf.loc[
1794
        (mastr_gdf.capacity > cap_range[0])
1795
        & (mastr_gdf.capacity <= cap_range[1])
1796
    ]
1797
1798
    if prop == "load_factor":
1799
        cap_range_gdf = cap_range_gdf.loc[cap_range_gdf[prop] <= 1]
1800
1801
    count = Counter(
1802
        cap_range_gdf[prop].loc[
1803
            ~cap_range_gdf[prop].isna()
1804
            & ~cap_range_gdf[prop].isnull()
1805
            & ~(cap_range_gdf[prop] == "None")
1806
        ]
1807
    )
1808
1809
    values = np.array(list(count.keys()))
1810
    probabilities = np.fromiter(count.values(), dtype=float)
1811
    probabilities = probabilities / np.sum(probabilities)
1812
1813
    return values, probabilities
1814
1815
1816
@timer_func
1817
def probabilities(
1818
    mastr_gdf: gpd.GeoDataFrame,
1819
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1820
    properties: list[str] | None = None,
1821
) -> dict:
1822
    """
1823
    Calculate the probability of the different options of properties of the
1824
    existing PV plants.
1825
    Parameters
1826
    -----------
1827
    mastr_gdf : geopandas.GeoDataFrame
1828
        GeoDataFrame containing geocoded MaStR data.
1829
    cap_ranges : list(tuple(int, int))
1830
        List of capacity ranges to distinguish between. The first tuple should
1831
        start with a zero and the last one should end with infinite.
1832
    properties : list(str)
1833
        List of properties to calculate probabilities for. Strings needs to be
1834
        in columns of mastr_gdf.
1835
    Returns
1836
    -------
1837
    dict
1838
        Dictionary with values and probabilities per capacity range.
1839
    """
1840
    if cap_ranges is None:
1841
        cap_ranges = [
1842
            (0, 30),
1843
            (30, 100),
1844
            (100, float("inf")),
1845
        ]
1846
    if properties is None:
1847
        properties = [
1848
            "EinheitlicheAusrichtungUndNeigungswinkel",
1849
            "Hauptausrichtung",
1850
            "HauptausrichtungNeigungswinkel",
1851
            "load_factor",
1852
        ]
1853
1854
    prob_dict = {}
1855
1856
    for cap_range in cap_ranges:
1857
        prob_dict[cap_range] = {
1858
            "values": {},
1859
            "probabilities": {},
1860
        }
1861
1862
        for prop in properties:
1863
            v, p = get_probability_for_property(
1864
                mastr_gdf,
1865
                cap_range,
1866
                prop,
1867
            )
1868
1869
            prob_dict[cap_range]["values"][prop] = v
1870
            prob_dict[cap_range]["probabilities"][prop] = p
1871
1872
    return prob_dict
1873
1874
1875
def cap_share_per_cap_range(
1876
    mastr_gdf: gpd.GeoDataFrame,
1877
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1878
) -> dict[tuple[int | float, int | float], float]:
1879
    """
1880
    Calculate the share of PV capacity from the total PV capacity within
1881
    capacity ranges.
1882
    Parameters
1883
    -----------
1884
    mastr_gdf : geopandas.GeoDataFrame
1885
        GeoDataFrame containing geocoded MaStR data.
1886
    cap_ranges : list(tuple(int, int))
1887
        List of capacity ranges to distinguish between. The first tuple should
1888
        start with a zero and the last one should end with infinite.
1889
    Returns
1890
    -------
1891
    dict
1892
        Dictionary with share of PV capacity from the total PV capacity within
1893
        capacity ranges.
1894
    """
1895
    if cap_ranges is None:
1896
        cap_ranges = [
1897
            (0, 30),
1898
            (30, 100),
1899
            (100, float("inf")),
1900
        ]
1901
1902
    cap_share_dict = {}
1903
1904
    total_cap = mastr_gdf.capacity.sum()
1905
1906
    for cap_range in cap_ranges:
1907
        cap_share = (
1908
            mastr_gdf.loc[
1909
                (mastr_gdf.capacity > cap_range[0])
1910
                & (mastr_gdf.capacity <= cap_range[1])
1911
            ].capacity.sum()
1912
            / total_cap
1913
        )
1914
1915
        cap_share_dict[cap_range] = cap_share
1916
1917
    return cap_share_dict
1918
1919
1920
def mean_load_factor_per_cap_range(
1921
    mastr_gdf: gpd.GeoDataFrame,
1922
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1923
) -> dict[tuple[int | float, int | float], float]:
1924
    """
1925
    Calculate the mean roof load factor per capacity range from existing PV
1926
    plants.
1927
    Parameters
1928
    -----------
1929
    mastr_gdf : geopandas.GeoDataFrame
1930
        GeoDataFrame containing geocoded MaStR data.
1931
    cap_ranges : list(tuple(int, int))
1932
        List of capacity ranges to distinguish between. The first tuple should
1933
        start with a zero and the last one should end with infinite.
1934
    Returns
1935
    -------
1936
    dict
1937
        Dictionary with mean roof load factor per capacity range.
1938
    """
1939
    if cap_ranges is None:
1940
        cap_ranges = [
1941
            (0, 30),
1942
            (30, 100),
1943
            (100, float("inf")),
1944
        ]
1945
1946
    load_factor_dict = {}
1947
1948
    for cap_range in cap_ranges:
1949
        load_factor = mastr_gdf.loc[
1950
            (mastr_gdf.load_factor <= 1)
1951
            & (mastr_gdf.capacity > cap_range[0])
1952
            & (mastr_gdf.capacity <= cap_range[1])
1953
        ].load_factor.mean()
1954
1955
        load_factor_dict[cap_range] = load_factor
1956
1957
    return load_factor_dict
1958
1959
1960
def building_area_range_per_cap_range(
1961
    mastr_gdf: gpd.GeoDataFrame,
1962
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1963
    min_building_size: int | float = 10.0,
1964
    upper_quantile: float = 0.95,
1965
    lower_quantile: float = 0.05,
1966
) -> dict[tuple[int | float, int | float], tuple[int | float, int | float]]:
1967
    """
1968
    Estimate normal building area range per capacity range.
1969
    Calculate the mean roof load factor per capacity range from existing PV
1970
    plants.
1971
    Parameters
1972
    -----------
1973
    mastr_gdf : geopandas.GeoDataFrame
1974
        GeoDataFrame containing geocoded MaStR data.
1975
    cap_ranges : list(tuple(int, int))
1976
        List of capacity ranges to distinguish between. The first tuple should
1977
        start with a zero and the last one should end with infinite.
1978
    min_building_size : int, float
1979
        Minimal building size to consider for PV plants.
1980
    upper_quantile : float
1981
        Upper quantile to estimate maximum building size per capacity range.
1982
    lower_quantile : float
1983
        Lower quantile to estimate minimum building size per capacity range.
1984
    Returns
1985
    -------
1986
    dict
1987
        Dictionary with estimated normal building area range per capacity
1988
        range.
1989
    """
1990
    if cap_ranges is None:
1991
        cap_ranges = [
1992
            (0, 30),
1993
            (30, 100),
1994
            (100, float("inf")),
1995
        ]
1996
1997
    building_area_range_dict = {}
1998
1999
    n_ranges = len(cap_ranges)
2000
2001
    for count, cap_range in enumerate(cap_ranges):
2002
        cap_range_gdf = mastr_gdf.loc[
2003
            (mastr_gdf.capacity > cap_range[0])
2004
            & (mastr_gdf.capacity <= cap_range[1])
2005
        ]
2006
2007
        if count == 0:
2008
            building_area_range_dict[cap_range] = (
2009
                min_building_size,
2010
                cap_range_gdf.building_area.quantile(upper_quantile),
2011
            )
2012
        elif count == n_ranges - 1:
2013
            building_area_range_dict[cap_range] = (
2014
                cap_range_gdf.building_area.quantile(lower_quantile),
2015
                float("inf"),
2016
            )
2017
        else:
2018
            building_area_range_dict[cap_range] = (
2019
                cap_range_gdf.building_area.quantile(lower_quantile),
2020
                cap_range_gdf.building_area.quantile(upper_quantile),
2021
            )
2022
2023
    values = list(building_area_range_dict.values())
2024
2025
    building_area_range_normed_dict = {}
2026
2027
    for count, (cap_range, (min_area, max_area)) in enumerate(
2028
        building_area_range_dict.items()
2029
    ):
2030
        if count == 0:
2031
            building_area_range_normed_dict[cap_range] = (
2032
                min_area,
2033
                np.mean((values[count + 1][0], max_area)),
2034
            )
2035
        elif count == n_ranges - 1:
2036
            building_area_range_normed_dict[cap_range] = (
2037
                np.mean((values[count - 1][1], min_area)),
2038
                max_area,
2039
            )
2040
        else:
2041
            building_area_range_normed_dict[cap_range] = (
2042
                np.mean((values[count - 1][1], min_area)),
2043
                np.mean((values[count + 1][0], max_area)),
2044
            )
2045
2046
    return building_area_range_normed_dict
2047
2048
2049
@timer_func
2050
def desaggregate_pv_in_mv_grid(
2051
    buildings_gdf: gpd.GeoDataFrame,
2052
    pv_cap: float | int,
2053
    **kwargs,
2054
) -> gpd.GeoDataFrame:
2055
    """
2056
    Desaggregate PV capacity on buildings within a given grid district.
2057
    Parameters
2058
    -----------
2059
    buildings_gdf : geopandas.GeoDataFrame
2060
        GeoDataFrame containing buildings within the grid district.
2061
    pv_cap : float, int
2062
        PV capacity to desaggregate.
2063
    Other Parameters
2064
    -----------
2065
    prob_dict : dict
2066
        Dictionary with values and probabilities per capacity range.
2067
    cap_share_dict : dict
2068
        Dictionary with share of PV capacity from the total PV capacity within
2069
        capacity ranges.
2070
    building_area_range_dict : dict
2071
        Dictionary with estimated normal building area range per capacity
2072
        range.
2073
    load_factor_dict : dict
2074
        Dictionary with mean roof load factor per capacity range.
2075
    seed : int
2076
        Seed to use for random operations with NumPy and pandas.
2077
    pv_cap_per_sq_m : float, int
2078
        Average expected, installable PV capacity per square meter.
2079
    Returns
2080
    -------
2081
    geopandas.GeoDataFrame
2082
        GeoDataFrame containing OSM building data with desaggregated PV
2083
        plants.
2084
    """
2085
    bus_id = int(buildings_gdf.bus_id.iat[0])
2086
2087
    rng = default_rng(seed=kwargs["seed"])
2088
    random_state = RandomState(seed=kwargs["seed"])
2089
2090
    results_df = pd.DataFrame(columns=buildings_gdf.columns)
2091
2092
    for cap_range, share in kwargs["cap_share_dict"].items():
2093
        pv_cap_range = pv_cap * share
2094
2095
        b_area_min, b_area_max = kwargs["building_area_range_dict"][cap_range]
2096
2097
        cap_range_buildings_gdf = buildings_gdf.loc[
2098
            ~buildings_gdf.index.isin(results_df.index)
2099
            & (buildings_gdf.building_area > b_area_min)
2100
            & (buildings_gdf.building_area <= b_area_max)
2101
        ]
2102
2103
        mean_load_factor = kwargs["load_factor_dict"][cap_range]
2104
        cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
2105
            mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
2106
            load_factor=np.nan,
2107
            capacity=np.nan,
2108
        )
2109
2110
        total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
2111
2112
        if total_mean_cap == 0:
2113
            logger.warning(
2114
                f"There are no matching roof for capacity range {cap_range} "
2115
                f"kW in grid {bus_id}. Using all buildings as fallback."
2116
            )
2117
2118
            cap_range_buildings_gdf = buildings_gdf.loc[
2119
                ~buildings_gdf.index.isin(results_df.index)
2120
            ]
2121
2122
            if len(cap_range_buildings_gdf) == 0:
2123
                logger.warning(
2124
                    "There are no roofes available for capacity range "
2125
                    f"{cap_range} kW in grid {bus_id}. Allowing dual use."
2126
                )
2127
                cap_range_buildings_gdf = buildings_gdf.copy()
2128
2129
            cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
2130
                mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
2131
                load_factor=np.nan,
2132
                capacity=np.nan,
2133
            )
2134
2135
            total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
2136
2137
        elif total_mean_cap < pv_cap_range:
2138
            logger.warning(
2139
                f"Average roof utilization of the roof area in grid {bus_id} "
2140
                f"and capacity range {cap_range} kW is not sufficient. The "
2141
                "roof utilization will be above average."
2142
            )
2143
2144
        frac = max(
2145
            pv_cap_range / total_mean_cap,
2146
            1 / len(cap_range_buildings_gdf),
2147
        )
2148
2149
        samples_gdf = cap_range_buildings_gdf.sample(
2150
            frac=min(1, frac),
2151
            random_state=random_state,
2152
        )
2153
2154
        cap_range_dict = kwargs["prob_dict"][cap_range]
2155
2156
        values_dict = cap_range_dict["values"]
2157
        p_dict = cap_range_dict["probabilities"]
2158
2159
        load_factors = rng.choice(
2160
            a=values_dict["load_factor"],
2161
            size=len(samples_gdf),
2162
            p=p_dict["load_factor"],
2163
        )
2164
2165
        samples_gdf = samples_gdf.assign(
2166
            load_factor=load_factors,
2167
            capacity=(
2168
                samples_gdf.building_area
2169
                * load_factors
2170
                * kwargs["pv_cap_per_sq_m"]
2171
            ).clip(lower=0.4),
2172
        )
2173
2174
        missing_factor = pv_cap_range / samples_gdf.capacity.sum()
2175
2176
        samples_gdf = samples_gdf.assign(
2177
            capacity=(samples_gdf.capacity * missing_factor),
2178
            load_factor=(samples_gdf.load_factor * missing_factor),
2179
        )
2180
2181
        assert np.isclose(
2182
            samples_gdf.capacity.sum(),
2183
            pv_cap_range,
2184
            rtol=1e-03,
2185
        ), f"{samples_gdf.capacity.sum()} != {pv_cap_range}"
2186
2187
        results_df = pd.concat(
2188
            [
2189
                results_df,
2190
                samples_gdf,
2191
            ],
2192
        )
2193
2194
    total_missing_factor = pv_cap / results_df.capacity.sum()
2195
2196
    results_df = results_df.assign(
2197
        capacity=(results_df.capacity * total_missing_factor),
2198
    )
2199
2200
    assert np.isclose(
2201
        results_df.capacity.sum(),
2202
        pv_cap,
2203
        rtol=1e-03,
2204
    ), f"{results_df.capacity.sum()} != {pv_cap}"
2205
2206
    return gpd.GeoDataFrame(
2207
        results_df,
2208
        crs=samples_gdf.crs,
0 ignored issues
show
introduced by
The variable samples_gdf does not seem to be defined in case the for loop on line 2092 is not entered. Are you sure this can never be the case?
Loading history...
2209
        geometry="geom",
2210
    )
2211
2212
2213
@timer_func
2214
def desaggregate_pv(
2215
    buildings_gdf: gpd.GeoDataFrame,
2216
    cap_df: pd.DataFrame,
2217
    **kwargs,
2218
) -> gpd.GeoDataFrame:
2219
    """
2220
    Desaggregate PV capacity on buildings within a given grid district.
2221
    Parameters
2222
    -----------
2223
    buildings_gdf : geopandas.GeoDataFrame
2224
        GeoDataFrame containing OSM buildings data.
2225
    cap_df : pandas.DataFrame
2226
        DataFrame with total rooftop capacity per mv grid.
2227
    Other Parameters
2228
    -----------
2229
    prob_dict : dict
2230
        Dictionary with values and probabilities per capacity range.
2231
    cap_share_dict : dict
2232
        Dictionary with share of PV capacity from the total PV capacity within
2233
        capacity ranges.
2234
    building_area_range_dict : dict
2235
        Dictionary with estimated normal building area range per capacity
2236
        range.
2237
    load_factor_dict : dict
2238
        Dictionary with mean roof load factor per capacity range.
2239
    seed : int
2240
        Seed to use for random operations with NumPy and pandas.
2241
    pv_cap_per_sq_m : float, int
2242
        Average expected, installable PV capacity per square meter.
2243
    Returns
2244
    -------
2245
    geopandas.GeoDataFrame
2246
        GeoDataFrame containing OSM building data with desaggregated PV
2247
        plants.
2248
    """
2249
    allocated_buildings_gdf = buildings_gdf.loc[~buildings_gdf.end_of_life]
2250
2251
    building_bus_ids = set(buildings_gdf.bus_id)
2252
    cap_bus_ids = set(cap_df.index)
2253
2254
    logger.debug(
2255
        f"Bus IDs from buildings: {len(building_bus_ids)}\nBus IDs from capacity: "
2256
        f"{len(cap_bus_ids)}"
2257
    )
2258
2259
    if len(building_bus_ids) > len(cap_bus_ids):
2260
        missing = building_bus_ids - cap_bus_ids
2261
    else:
2262
        missing = cap_bus_ids - building_bus_ids
2263
2264
    logger.debug(str(missing))
2265
2266
    # assert set(buildings_gdf.bus_id.unique()) == set(cap_df.index)
2267
2268
    for bus_id in buildings_gdf.bus_id.unique():
2269
        buildings_grid_gdf = buildings_gdf.loc[buildings_gdf.bus_id == bus_id]
2270
2271
        pv_installed_gdf = buildings_grid_gdf.loc[
2272
            ~buildings_grid_gdf.end_of_life
2273
        ]
2274
2275
        pv_installed = pv_installed_gdf.capacity.sum()
2276
2277
        pot_buildings_gdf = buildings_grid_gdf.drop(
2278
            index=pv_installed_gdf.index
2279
        )
2280
2281
        if len(pot_buildings_gdf) == 0:
2282
            logger.error(
2283
                f"In grid {bus_id} there are no potential buildings to allocate "
2284
                "PV capacity to. The grid is skipped. This message should only "
2285
                "appear doing test runs with few buildings."
2286
            )
2287
2288
            continue
2289
2290
        pv_target = cap_df.at[bus_id, "capacity"] * 1000
2291
2292
        logger.debug(f"pv_target: {pv_target}")
2293
2294
        pv_missing = pv_target - pv_installed
2295
2296
        if pv_missing <= 0:
2297
            logger.info(
2298
                f"In grid {bus_id} there is more PV installed ({pv_installed: g}) in "
2299
                f"status Quo than allocated within the scenario ({pv_target: g}). No "
2300
                f"new generators are added."
2301
            )
2302
2303
            continue
2304
2305
        if pot_buildings_gdf.max_cap.sum() < pv_missing:
2306
            logger.error(
2307
                f"In grid {bus_id} there is less PV potential ("
2308
                f"{pot_buildings_gdf.max_cap.sum():g} kW) than allocated PV  capacity "
2309
                f"({pv_missing:g} kW). The average roof utilization will be very high."
2310
            )
2311
2312
        gdf = desaggregate_pv_in_mv_grid(
2313
            buildings_gdf=pot_buildings_gdf,
2314
            pv_cap=pv_missing,
2315
            **kwargs,
2316
        )
2317
2318
        logger.debug(f"New cap in grid {bus_id}: {gdf.capacity.sum()}")
2319
        logger.debug(f"Installed cap in grid {bus_id}: {pv_installed}")
2320
        logger.debug(
2321
            f"Total cap in grid {bus_id}: {gdf.capacity.sum() + pv_installed}"
2322
        )
2323
2324
        if not np.isclose(
2325
            gdf.capacity.sum() + pv_installed, pv_target, rtol=1e-3
2326
        ):
2327
            logger.warning(
2328
                f"The desired capacity and actual capacity in grid {bus_id} differ.\n"
2329
                f"Desired cap: {pv_target}\nActual cap: "
2330
                f"{gdf.capacity.sum() + pv_installed}"
2331
            )
2332
2333
        allocated_buildings_gdf = pd.concat(
2334
            [
2335
                allocated_buildings_gdf,
2336
                gdf,
2337
            ]
2338
        )
2339
2340
    logger.debug("Desaggregated scenario.")
2341
    logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}")
2342
    logger.debug(
2343
        f"Generator capacity: {allocated_buildings_gdf.capacity.sum(): g}"
2344
    )
2345
2346
    return gpd.GeoDataFrame(
2347
        allocated_buildings_gdf,
2348
        crs=gdf.crs,
0 ignored issues
show
introduced by
The variable gdf does not seem to be defined for all execution paths.
Loading history...
2349
        geometry="geom",
2350
    )
2351
2352
2353
@timer_func
2354
def add_buildings_meta_data(
2355
    buildings_gdf: gpd.GeoDataFrame,
2356
    prob_dict: dict,
2357
    seed: int,
2358
) -> gpd.GeoDataFrame:
2359
    """
2360
    Randomly add additional metadata to desaggregated PV plants.
2361
    Parameters
2362
    -----------
2363
    buildings_gdf : geopandas.GeoDataFrame
2364
        GeoDataFrame containing OSM buildings data with desaggregated PV
2365
        plants.
2366
    prob_dict : dict
2367
        Dictionary with values and probabilities per capacity range.
2368
    seed : int
2369
        Seed to use for random operations with NumPy and pandas.
2370
    Returns
2371
    -------
2372
    geopandas.GeoDataFrame
2373
        GeoDataFrame containing OSM building data with desaggregated PV
2374
        plants.
2375
    """
2376
    rng = default_rng(seed=seed)
2377
    buildings_gdf = buildings_gdf.reset_index().rename(
2378
        columns={
2379
            "index": "building_id",
2380
        }
2381
    )
2382
2383
    for (min_cap, max_cap), cap_range_prob_dict in prob_dict.items():
2384
        cap_range_gdf = buildings_gdf.loc[
2385
            (buildings_gdf.capacity >= min_cap)
2386
            & (buildings_gdf.capacity < max_cap)
2387
        ]
2388
2389
        for key, values in cap_range_prob_dict["values"].items():
2390
            if key == "load_factor":
2391
                continue
2392
2393
            gdf = cap_range_gdf.loc[
2394
                cap_range_gdf[key].isna()
2395
                | cap_range_gdf[key].isnull()
2396
                | (cap_range_gdf[key] == "None")
2397
            ]
2398
2399
            key_vals = rng.choice(
2400
                a=values,
2401
                size=len(gdf),
2402
                p=cap_range_prob_dict["probabilities"][key],
2403
            )
2404
2405
            buildings_gdf.loc[gdf.index, key] = key_vals
2406
2407
    return buildings_gdf
2408
2409
2410
def add_voltage_level(
2411
    buildings_gdf: gpd.GeoDataFrame,
2412
) -> gpd.GeoDataFrame:
2413
    """
2414
    Add voltage level derived from generator capacity to the power plants.
2415
    Parameters
2416
    -----------
2417
    buildings_gdf : geopandas.GeoDataFrame
2418
        GeoDataFrame containing OSM buildings data with desaggregated PV
2419
        plants.
2420
    Returns
2421
    -------
2422
    geopandas.GeoDataFrame
2423
        GeoDataFrame containing OSM building data with voltage level per generator.
2424
    """
2425
2426
    def voltage_levels(p: float) -> int:
2427
        if p < 100:
2428
            return 7
2429
        elif p < 200:
2430
            return 6
2431
        elif p < 5500:
2432
            return 5
2433
        elif p < 20000:
2434
            return 4
2435
        elif p < 120000:
2436
            return 3
2437
        return 1
2438
2439
    return buildings_gdf.assign(
2440
        voltage_level=buildings_gdf.capacity.apply(voltage_levels)
2441
    )
2442
2443
2444
def add_start_up_date(
2445
    buildings_gdf: gpd.GeoDataFrame,
2446
    start: pd.Timestamp,
2447
    end: pd.Timestamp,
2448
    seed: int,
2449
):
2450
    """
2451
    Randomly and linear add start-up date to new pv generators.
2452
    Parameters
2453
    ----------
2454
    buildings_gdf : geopandas.GeoDataFrame
2455
        GeoDataFrame containing OSM buildings data with desaggregated PV
2456
        plants.
2457
    start : pandas.Timestamp
2458
        Minimum Timestamp to use.
2459
    end : pandas.Timestamp
2460
        Maximum Timestamp to use.
2461
    seed : int
2462
        Seed to use for random operations with NumPy and pandas.
2463
    Returns
2464
    -------
2465
    geopandas.GeoDataFrame
2466
        GeoDataFrame containing OSM buildings data with start-up date added.
2467
    """
2468
    rng = default_rng(seed=seed)
2469
2470
    date_range = pd.date_range(start=start, end=end, freq="1D")
2471
2472
    return buildings_gdf.assign(
2473
        start_up_date=rng.choice(date_range, size=len(buildings_gdf))
2474
    )
2475
2476
2477
@timer_func
2478
def allocate_scenarios(
2479
    mastr_gdf: gpd.GeoDataFrame,
2480
    valid_buildings_gdf: gpd.GeoDataFrame,
2481
    last_scenario_gdf: gpd.GeoDataFrame,
2482
    scenario: str,
2483
):
2484
    """
2485
    Desaggregate and allocate scenario pv rooftop ramp-ups onto buildings.
2486
    Parameters
2487
    ----------
2488
    mastr_gdf : geopandas.GeoDataFrame
2489
        GeoDataFrame containing geocoded MaStR data.
2490
    valid_buildings_gdf : geopandas.GeoDataFrame
2491
        GeoDataFrame containing OSM buildings data.
2492
    last_scenario_gdf : geopandas.GeoDataFrame
2493
        GeoDataFrame containing OSM buildings matched with pv generators from temporal
2494
        preceding scenario.
2495
    scenario : str
2496
        Scenario to desaggrgate and allocate.
2497
    Returns
2498
    -------
2499
    tuple
2500
        geopandas.GeoDataFrame
2501
            GeoDataFrame containing OSM buildings matched with pv generators.
2502
        pandas.DataFrame
2503
            DataFrame containing pv rooftop capacity per grid id.
2504
    """
2505
    cap_per_bus_id_df = cap_per_bus_id(scenario)
2506
2507
    logger.debug(
2508
        f"cap_per_bus_id_df total capacity: {cap_per_bus_id_df.capacity.sum()}"
2509
    )
2510
2511
    last_scenario_gdf = determine_end_of_life_gens(
2512
        last_scenario_gdf,
2513
        SCENARIO_TIMESTAMP[scenario],
2514
        PV_ROOFTOP_LIFETIME,
2515
    )
2516
2517
    buildings_gdf = calculate_max_pv_cap_per_building(
2518
        valid_buildings_gdf,
2519
        last_scenario_gdf,
2520
        PV_CAP_PER_SQ_M,
2521
        ROOF_FACTOR,
2522
    )
2523
2524
    mastr_gdf = calculate_building_load_factor(
2525
        mastr_gdf,
2526
        buildings_gdf,
2527
    )
2528
2529
    probabilities_dict = probabilities(
2530
        mastr_gdf,
2531
        cap_ranges=CAP_RANGES,
2532
    )
2533
2534
    cap_share_dict = cap_share_per_cap_range(
2535
        mastr_gdf,
2536
        cap_ranges=CAP_RANGES,
2537
    )
2538
2539
    load_factor_dict = mean_load_factor_per_cap_range(
2540
        mastr_gdf,
2541
        cap_ranges=CAP_RANGES,
2542
    )
2543
2544
    building_area_range_dict = building_area_range_per_cap_range(
2545
        mastr_gdf,
2546
        cap_ranges=CAP_RANGES,
2547
        min_building_size=MIN_BUILDING_SIZE,
2548
        upper_quantile=UPPER_QUNATILE,
2549
        lower_quantile=LOWER_QUANTILE,
2550
    )
2551
2552
    allocated_buildings_gdf = desaggregate_pv(
2553
        buildings_gdf=buildings_gdf,
2554
        cap_df=cap_per_bus_id_df,
2555
        prob_dict=probabilities_dict,
2556
        cap_share_dict=cap_share_dict,
2557
        building_area_range_dict=building_area_range_dict,
2558
        load_factor_dict=load_factor_dict,
2559
        seed=SEED,
2560
        pv_cap_per_sq_m=PV_CAP_PER_SQ_M,
2561
    )
2562
2563
    allocated_buildings_gdf = allocated_buildings_gdf.assign(scenario=scenario)
2564
2565
    meta_buildings_gdf = frame_to_numeric(
2566
        add_buildings_meta_data(
2567
            allocated_buildings_gdf,
2568
            probabilities_dict,
2569
            SEED,
2570
        )
2571
    )
2572
2573
    return (
2574
        add_start_up_date(
2575
            meta_buildings_gdf,
2576
            start=last_scenario_gdf.start_up_date.max(),
2577
            end=SCENARIO_TIMESTAMP[scenario],
2578
            seed=SEED,
2579
        ),
2580
        cap_per_bus_id_df,
2581
    )
2582
2583
2584
class EgonPowerPlantPvRoofBuildingScenario(Base):
2585
    __tablename__ = "egon_power_plants_pv_roof_building"
2586
    __table_args__ = {"schema": "supply"}
2587
2588
    index = Column(Integer, primary_key=True, index=True)
2589
    scenario = Column(String)
2590
    building_id = Column(Integer)
2591
    gens_id = Column(String, nullable=True)
2592
    capacity = Column(Float)
2593
    einheitliche_ausrichtung_und_neigungswinkel = Column(Float)
2594
    hauptausrichtung = Column(String)
2595
    hauptausrichtung_neigungswinkel = Column(String)
2596
    voltage_level = Column(Integer)
2597
2598
2599
def create_scenario_table(buildings_gdf):
2600
    """Create mapping table pv_unit <-> building for scenario"""
2601
    EgonPowerPlantPvRoofBuildingScenario.__table__.drop(
2602
        bind=engine, checkfirst=True
2603
    )
2604
    EgonPowerPlantPvRoofBuildingScenario.__table__.create(
2605
        bind=engine, checkfirst=True
2606
    )
2607
2608
    buildings_gdf.rename(columns=COLS_TO_RENAME).assign(
2609
        capacity=buildings_gdf.capacity.div(10**3)  # kW -> MW
2610
    )[COLS_TO_EXPORT].reset_index().to_sql(
2611
        name=EgonPowerPlantPvRoofBuildingScenario.__table__.name,
2612
        schema=EgonPowerPlantPvRoofBuildingScenario.__table__.schema,
2613
        con=db.engine(),
2614
        if_exists="append",
2615
        index=False,
2616
    )
2617
2618
2619
def geocode_mastr_data():
2620
    """
2621
    Read PV rooftop data from MaStR CSV
2622
    TODO: the source will be replaced as soon as the MaStR data is available
2623
     in DB.
2624
    """
2625
    mastr_df = mastr_data(
2626
        MASTR_INDEX_COL,
2627
        MASTR_RELEVANT_COLS,
2628
        MASTR_DTYPES,
2629
        MASTR_PARSE_DATES,
2630
    )
2631
2632
    clean_mastr_df = clean_mastr_data(
2633
        mastr_df,
2634
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
2635
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
2636
        seed=SEED,
2637
        rounding=ROUNDING,
2638
    )
2639
2640
    geocoding_df = geocoding_data(clean_mastr_df)
2641
2642
    ratelimiter = geocoder(USER_AGENT, MIN_DELAY_SECONDS)
2643
2644
    geocode_gdf = geocode_data(geocoding_df, ratelimiter, EPSG)
2645
2646
    create_geocoded_table(geocode_gdf)
2647
2648
2649
def pv_rooftop_to_buildings():
2650
    """Main script, executed as task"""
2651
2652
    mastr_gdf = load_mastr_data()
2653
2654
    buildings_gdf = load_building_data()
2655
2656
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_to_buildings(
2657
        mastr_gdf, buildings_gdf
2658
    )
2659
2660
    all_buildings_gdf = (
2661
        desagg_mastr_gdf.assign(scenario="status_quo")
2662
        .reset_index()
2663
        .rename(columns={"geometry": "geom", "EinheitMastrNummer": "gens_id"})
2664
    )
2665
2666
    scenario_buildings_gdf = all_buildings_gdf.copy()
2667
2668
    cap_per_bus_id_df = pd.DataFrame()
2669
2670
    for scenario in SCENARIOS:
2671
        logger.debug(f"Desaggregating scenario {scenario}.")
2672
        (
2673
            scenario_buildings_gdf,
2674
            cap_per_bus_id_scenario_df,
2675
        ) = allocate_scenarios(  # noqa: F841
2676
            desagg_mastr_gdf,
2677
            desagg_buildings_gdf,
2678
            scenario_buildings_gdf,
2679
            scenario,
2680
        )
2681
2682
        all_buildings_gdf = gpd.GeoDataFrame(
2683
            pd.concat(
2684
                [all_buildings_gdf, scenario_buildings_gdf], ignore_index=True
2685
            ),
2686
            crs=scenario_buildings_gdf.crs,
2687
            geometry="geom",
2688
        )
2689
2690
        cap_per_bus_id_df = pd.concat(
2691
            [cap_per_bus_id_df, cap_per_bus_id_scenario_df]
2692
        )
2693
2694
    # export scenario
2695
    create_scenario_table(add_voltage_level(all_buildings_gdf))
2696