Passed
Pull Request — dev (#1008)
by
unknown
01:39
created

federal_state_data()   A

Complexity

Conditions 2

Size

Total Lines 24
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 24
rs 9.95
c 0
b 0
f 0
cc 2
nop 1
1
"""
2
Distribute MaStR PV rooftop capacities to OSM and synthetic buildings. Generate new
3
PV rooftop generators for scenarios eGon2035 and eGon100RE.
4
Data cleaning: Drop duplicates and entries with missing critical data. Determine most
5
plausible capacity from multiple values given in MaStR data. Drop generators which don't
6
have any plausible capacity data (23.5MW > P > 0.1). Randomly and weighted add a
7
start-up date if it is missing. Extract zip and municipality from 'Standort' given in
8
MaStR data. Geocode unique zip and municipality combinations with Nominatim (1sec
9
delay). Drop generators for which geocoding failed or which are located outside the
10
municipalities of Germany. Add some visual sanity checks for cleaned data.
11
Allocation of MaStR data: Allocate each generator to an existing building from OSM.
12
Determine the quantile each generator and building is in depending on the capacity of
13
the generator and the area of the polygon of the building. Randomly distribute
14
generators within each municipality preferably within the same building area quantile as
15
the generators are capacity wise. If not enough buildings exists within a municipality
16
and quantile additional buildings from other quantiles are chosen randomly.
17
Desegregation of pv rooftop scenarios: The scenario data per federal state is linear
18
distributed to the mv grid districts according to the pv rooftop potential per mv grid
19
district. The rooftop potential is estimated from the building area given from the OSM
20
buildings. Grid districts, which are located in several federal states, are allocated PV
21
capacity according to their respective roof potential in the individual federal states.
22
The desegregation of PV plants within a grid districts respects existing plants from
23
MaStR, which did not reach their end of life. New PV plants are randomly and weighted
24
generated using a breakdown of MaStR data as generator basis. Plant metadata (e.g. plant
25
orientation) is also added random and weighted from MaStR data as basis.
26
"""
27
from __future__ import annotations
28
29
from collections import Counter
30
from functools import wraps
31
from pathlib import Path
32
from time import perf_counter
33
from typing import Any
34
35
from geoalchemy2 import Geometry
36
from geopy.extra.rate_limiter import RateLimiter
37
from geopy.geocoders import Nominatim
38
from loguru import logger
39
from numpy.random import RandomState, default_rng
40
from pyproj.crs.crs import CRS
41
from sqlalchemy import BigInteger, Column, Float, Integer, String
42
from sqlalchemy.dialects.postgresql import HSTORE
43
from sqlalchemy.ext.declarative import declarative_base
44
import geopandas as gpd
45
import numpy as np
46
import pandas as pd
47
48
from egon.data import config, db
49
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
50
    OsmBuildingsSynthetic,
51
)
52
from egon.data.datasets.scenario_capacities import EgonScenarioCapacities
53
from egon.data.datasets.zensus_vg250 import Vg250Gem
54
55
engine = db.engine()
56
Base = declarative_base()
57
SEED = int(config.settings()["egon-data"]["--random-seed"])
58
59
# TODO: move to yml
60
# mastr data
61
MASTR_RELEVANT_COLS = [
62
    "EinheitMastrNummer",
63
    "Bruttoleistung",
64
    "StatisikFlag",
65
    "Bruttoleistung_extended",
66
    "Nettonennleistung",
67
    "InstallierteLeistung",
68
    "zugeordneteWirkleistungWechselrichter",
69
    "EinheitBetriebsstatus",
70
    "Standort",
71
    "Bundesland",
72
    "Land",
73
    "Landkreis",
74
    "Gemeinde",
75
    "Postleitzahl",
76
    "Ort",
77
    "GeplantesInbetriebnahmedatum",
78
    "Inbetriebnahmedatum",
79
    "GemeinsamerWechselrichterMitSpeicher",
80
    "Lage",
81
    "Leistungsbegrenzung",
82
    "EinheitlicheAusrichtungUndNeigungswinkel",
83
    "Hauptausrichtung",
84
    "HauptausrichtungNeigungswinkel",
85
    "Nebenausrichtung",
86
]
87
88
MASTR_DTYPES = {
89
    "EinheitMastrNummer": str,
90
    "Bruttoleistung": float,
91
    "StatisikFlag": str,
92
    "Bruttoleistung_extended": float,
93
    "Nettonennleistung": float,
94
    "InstallierteLeistung": float,
95
    "zugeordneteWirkleistungWechselrichter": float,
96
    "EinheitBetriebsstatus": str,
97
    "Standort": str,
98
    "Bundesland": str,
99
    "Land": str,
100
    "Landkreis": str,
101
    "Gemeinde": str,
102
    # "Postleitzahl": int,  # fails because of nan values
103
    "Ort": str,
104
    "GemeinsamerWechselrichterMitSpeicher": str,
105
    "Lage": str,
106
    "Leistungsbegrenzung": str,
107
    # this will parse nan values as false wich is not always correct
108
    # "EinheitlicheAusrichtungUndNeigungswinkel": bool,
109
    "Hauptausrichtung": str,
110
    "HauptausrichtungNeigungswinkel": str,
111
    "Nebenausrichtung": str,
112
    "NebenausrichtungNeigungswinkel": str,
113
}
114
115
MASTR_PARSE_DATES = [
116
    "GeplantesInbetriebnahmedatum",
117
    "Inbetriebnahmedatum",
118
]
119
120
MASTR_INDEX_COL = "EinheitMastrNummer"
121
122
EPSG = 4326
123
SRID = 3035
124
125
# data cleaning
126
MAX_REALISTIC_PV_CAP = 23500
127
MIN_REALISTIC_PV_CAP = 0.1
128
ROUNDING = 1
129
130
# geopy
131
MIN_DELAY_SECONDS = 1
132
USER_AGENT = "rli_kh_geocoder"
133
134
# show additional logging information
135
VERBOSE = False
136
137
EXPORT_DIR = Path(__name__).resolve().parent / "data"
138
EXPORT_FILE = "mastr_geocoded.gpkg"
139
EXPORT_PATH = EXPORT_DIR / EXPORT_FILE
140
DRIVER = "GPKG"
141
142
# Number of quantiles
143
Q = 5
144
145
# Scenario Data
146
CARRIER = "solar_rooftop"
147
SCENARIOS = ["eGon2035", "eGon100RE"]
148
SCENARIO_TIMESTAMP = {
149
    "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"),
150
    "eGon100RE": pd.Timestamp("2050-01-01", tz="UTC"),
151
}
152
PV_ROOFTOP_LIFETIME = pd.Timedelta(20 * 365, unit="D")
153
154
# Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp
155
# https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf
156
MODUL_CAP = 0.4  # kWp
157
MODUL_SIZE = 1.096 * 1.754  # m²
158
PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE
159
160
# Estimation of usable roof area
161
# Factor for the conversion of building area to roof area
162
# estimation mean roof pitch: 35°
163
# estimation usable roof share: 80%
164
# estimation that only the south side of the building is used for pv
165
# see https://mediatum.ub.tum.de/doc/%20969497/969497.pdf
166
# AREA_FACTOR = 1.221
167
# USABLE_ROOF_SHARE = 0.8
168
# SOUTH_SHARE = 0.5
169
# ROOF_FACTOR = AREA_FACTOR * USABLE_ROOF_SHARE * SOUTH_SHARE
170
ROOF_FACTOR = 0.5
171
172
CAP_RANGES = [
173
    (0, 30),
174
    (30, 100),
175
    (100, float("inf")),
176
]
177
178
MIN_BUILDING_SIZE = 10.0
179
UPPER_QUNATILE = 0.95
180
LOWER_QUANTILE = 0.05
181
182
COLS_TO_RENAME = {
183
    "EinheitlicheAusrichtungUndNeigungswinkel": (
184
        "einheitliche_ausrichtung_und_neigungswinkel"
185
    ),
186
    "Hauptausrichtung": "hauptausrichtung",
187
    "HauptausrichtungNeigungswinkel": "hauptausrichtung_neigungswinkel",
188
}
189
190
COLS_TO_EXPORT = [
191
    "scenario",
192
    "bus_id",
193
    "building_id",
194
    "gens_id",
195
    "capacity",
196
    "einheitliche_ausrichtung_und_neigungswinkel",
197
    "hauptausrichtung",
198
    "hauptausrichtung_neigungswinkel",
199
    "voltage_level",
200
]
201
202
# TODO
203
INCLUDE_SYNTHETIC_BUILDINGS = True
204
ONLY_BUILDINGS_WITH_DEMAND = True
205
TEST_RUN = False
206
207
208
def timer_func(func):
209
    @wraps(func)
210
    def timeit_wrapper(*args, **kwargs):
211
        start_time = perf_counter()
212
        result = func(*args, **kwargs)
213
        end_time = perf_counter()
214
        total_time = end_time - start_time
215
        logger.debug(
216
            f"Function {func.__name__} took {total_time:.4f} seconds."
217
        )
218
        return result
219
220
    return timeit_wrapper
221
222
223
@timer_func
224
def mastr_data(
225
    index_col: str | int | list[str] | list[int],
226
    usecols: list[str],
227
    dtype: dict[str, Any] | None,
228
    parse_dates: list[str] | None,
229
) -> pd.DataFrame:
230
    """
231
    Read MaStR data from csv.
232
233
    Parameters
234
    -----------
235
    index_col : str, int or list of str or int
236
        Column(s) to use as the row labels of the DataFrame.
237
    usecols : list of str
238
        Return a subset of the columns.
239
    dtype : dict of column (str) -> type (any), optional
240
        Data type for data or columns.
241
    parse_dates : list of names (str), optional
242
        Try to parse given columns to datetime.
243
    Returns
244
    -------
245
    pandas.DataFrame
246
        DataFrame containing MaStR data.
247
    """
248
    mastr_path = Path(
249
        config.datasets()["power_plants"]["sources"]["mastr_pv"]
250
    ).resolve()
251
252
    mastr_df = pd.read_csv(
253
        mastr_path,
254
        index_col=index_col,
255
        usecols=usecols,
256
        dtype=dtype,
257
        parse_dates=parse_dates,
258
    )
259
260
    mastr_df = mastr_df.loc[
261
        (mastr_df.StatisikFlag == "B")
262
        & (mastr_df.EinheitBetriebsstatus == "InBetrieb")
263
        & (mastr_df.Land == "Deutschland")
264
        & (mastr_df.Lage == "BaulicheAnlagen")
265
    ]
266
267
    if (
268
        config.settings()["egon-data"]["--dataset-boundary"]
269
        == "Schleswig-Holstein"
270
    ):
271
        init_len = len(mastr_df)
272
273
        mastr_df = mastr_df.loc[mastr_df.Bundesland == "SchleswigHolstein"]
274
275
        logger.info(
276
            f"Using only MaStR data within Schleswig-Holstein. "
277
            f"{init_len - len(mastr_df)} of {init_len} generators are dropped."
278
        )
279
280
    logger.debug("MaStR data loaded.")
281
282
    return mastr_df
283
284
285
@timer_func
286
def clean_mastr_data(
287
    mastr_df: pd.DataFrame,
288
    max_realistic_pv_cap: int | float,
289
    min_realistic_pv_cap: int | float,
290
    rounding: int,
291
    seed: int,
292
) -> pd.DataFrame:
293
    """
294
    Clean the MaStR data from implausible data.
295
296
    * Drop MaStR ID duplicates.
297
    * Drop generators with implausible capacities.
298
    * Drop generators without any kind of start-up date.
299
    * Clean up Standort column and capacity.
300
301
    Parameters
302
    -----------
303
    mastr_df : pandas.DataFrame
304
        DataFrame containing MaStR data.
305
    max_realistic_pv_cap : int or float
306
        Maximum capacity, which is considered to be realistic.
307
    min_realistic_pv_cap : int or float
308
        Minimum capacity, which is considered to be realistic.
309
    rounding : int
310
        Rounding to use when cleaning up capacity. E.g. when
311
        rounding is 1 a capacity of 9.93 will be rounded to 9.9.
312
    seed : int
313
        Seed to use for random operations with NumPy and pandas.
314
    Returns
315
    -------
316
    pandas.DataFrame
317
        DataFrame containing cleaned MaStR data.
318
    """
319
    init_len = len(mastr_df)
320
321
    # drop duplicates
322
    mastr_df = mastr_df.loc[~mastr_df.index.duplicated()]
323
324
    # drop invalid entries in standort
325
    index_to_drop = mastr_df.loc[
326
        (mastr_df.Standort.isna()) | (mastr_df.Standort.isnull())
327
    ].index
328
329
    mastr_df = mastr_df.loc[~mastr_df.index.isin(index_to_drop)]
330
331
    df = mastr_df[
332
        [
333
            "Bruttoleistung",
334
            "Bruttoleistung_extended",
335
            "Nettonennleistung",
336
            "zugeordneteWirkleistungWechselrichter",
337
            "InstallierteLeistung",
338
        ]
339
    ].round(rounding)
340
341
    # use only the smallest capacity rating if multiple are given
342
    mastr_df = mastr_df.assign(
343
        capacity=[
344
            most_plausible(p_tub, min_realistic_pv_cap)
345
            for p_tub in df.itertuples(index=False)
346
        ]
347
    )
348
349
    # drop generators without any capacity info
350
    # and capacity of zero
351
    # and if the capacity is > 23.5 MW, because
352
    # Germanies largest rooftop PV is 23 MW
353
    # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379
354
    mastr_df = mastr_df.loc[
355
        (~mastr_df.capacity.isna())
356
        & (mastr_df.capacity <= max_realistic_pv_cap)
357
        & (mastr_df.capacity > min_realistic_pv_cap)
358
    ]
359
360
    # get zip and municipality
361
    mastr_df[["zip_and_municipality", "drop_this"]] = pd.DataFrame(
362
        mastr_df.Standort.astype(str)
363
        .apply(
364
            zip_and_municipality_from_standort,
365
            args=(VERBOSE,),
366
        )
367
        .tolist(),
368
        index=mastr_df.index,
369
    )
370
371
    # drop invalid entries
372
    mastr_df = mastr_df.loc[mastr_df.drop_this].drop(columns="drop_this")
373
374
    # add ", Deutschland" just in case
375
    mastr_df = mastr_df.assign(
376
        zip_and_municipality=(mastr_df.zip_and_municipality + ", Deutschland")
377
    )
378
379
    # get consistent start-up date
380
    mastr_df = mastr_df.assign(
381
        start_up_date=mastr_df.Inbetriebnahmedatum,
382
    )
383
384
    mastr_df.loc[mastr_df.start_up_date.isna()] = mastr_df.loc[
385
        mastr_df.start_up_date.isna()
386
    ].assign(
387
        start_up_date=mastr_df.GeplantesInbetriebnahmedatum.loc[
388
            mastr_df.start_up_date.isna()
389
        ]
390
    )
391
392
    # randomly and weighted fill missing start-up dates
393
    pool = mastr_df.loc[
394
        ~mastr_df.start_up_date.isna()
395
    ].start_up_date.to_numpy()
396
397
    size = len(mastr_df) - len(pool)
398
399
    if size > 0:
400
        np.random.seed(seed)
401
402
        choice = np.random.choice(
403
            pool,
404
            size=size,
405
            replace=False,
406
        )
407
408
        mastr_df.loc[mastr_df.start_up_date.isna()] = mastr_df.loc[
409
            mastr_df.start_up_date.isna()
410
        ].assign(start_up_date=choice)
411
412
        logger.info(
413
            f"Randomly and weigthed added start-up date to {size} generators."
414
        )
415
416
    mastr_df = mastr_df.assign(
417
        start_up_date=pd.to_datetime(mastr_df.start_up_date, utc=True)
418
    )
419
420
    end_len = len(mastr_df)
421
    logger.debug(
422
        f"Dropped {init_len - end_len} "
423
        f"({((init_len - end_len) / init_len) * 100:g}%)"
424
        f" of {init_len} rows from MaStR DataFrame."
425
    )
426
427
    return mastr_df
428
429
430
def zip_and_municipality_from_standort(
431
    standort: str,
432
    verbose: bool = False,
433
) -> tuple[str, bool]:
434
    """
435
    Get zip code and municipality from Standort string split into a list.
436
    Parameters
437
    -----------
438
    standort : str
439
        Standort as given from MaStR data.
440
    verbose : bool
441
        Logs additional info if True.
442
    Returns
443
    -------
444
    str
445
        Standort with only the zip code and municipality
446
        as well a ', Germany' added.
447
    """
448
    if verbose:
449
        logger.debug(f"Uncleaned String: {standort}")
450
451
    standort_list = standort.split()
452
453
    found = False
454
    count = 0
455
456
    for count, elem in enumerate(standort_list):
457
        if len(elem) != 5:
458
            continue
459
        if not elem.isnumeric():
460
            continue
461
462
        found = True
463
464
        break
465
466
    if found:
467
        cleaned_str = " ".join(standort_list[count:])
468
469
        if verbose:
470
            logger.debug(f"Cleaned String:   {cleaned_str}")
471
472
        return cleaned_str, found
473
474
    logger.warning(
475
        "Couldn't identify zip code. This entry will be dropped."
476
        f" Original standort: {standort}."
477
    )
478
479
    return standort, found
480
481
482
def most_plausible(
483
    p_tub: tuple,
484
    min_realistic_pv_cap: int | float,
485
) -> float:
486
    """
487
    Try to determine the most plausible capacity.
488
    Try to determine the most plausible capacity from a given
489
    generator from MaStR data.
490
    Parameters
491
    -----------
492
    p_tub : tuple
493
        Tuple containing the different capacities given in
494
        the MaStR data.
495
    min_realistic_pv_cap : int or float
496
        Minimum capacity, which is considered to be realistic.
497
    Returns
498
    -------
499
    float
500
        Capacity of the generator estimated as the most realistic.
501
    """
502
    count = Counter(p_tub).most_common(3)
503
504
    if len(count) == 1:
505
        return count[0][0]
506
507
    val1 = count[0][0]
508
    val2 = count[1][0]
509
510
    if len(count) == 2:
511
        min_val = min(val1, val2)
512
        max_val = max(val1, val2)
513
    else:
514
        val3 = count[2][0]
515
516
        min_val = min(val1, val2, val3)
517
        max_val = max(val1, val2, val3)
518
519
    if min_val < min_realistic_pv_cap:
520
        return max_val
521
522
    return min_val
523
524
525
def geocoder(
526
    user_agent: str,
527
    min_delay_seconds: int,
528
) -> RateLimiter:
529
    """
530
    Setup Nominatim geocoding class.
531
    Parameters
532
    -----------
533
    user_agent : str
534
        The app name.
535
    min_delay_seconds : int
536
        Delay in seconds to use between requests to Nominatim.
537
        A minimum of 1 is advised.
538
    Returns
539
    -------
540
    geopy.extra.rate_limiter.RateLimiter
541
        Nominatim RateLimiter geocoding class to use for geocoding.
542
    """
543
    locator = Nominatim(user_agent=user_agent)
544
    return RateLimiter(
545
        locator.geocode,
546
        min_delay_seconds=min_delay_seconds,
547
    )
548
549
550
def geocoding_data(
551
    clean_mastr_df: pd.DataFrame,
552
) -> pd.DataFrame:
553
    """
554
    Setup DataFrame to geocode.
555
    Parameters
556
    -----------
557
    clean_mastr_df : pandas.DataFrame
558
        DataFrame containing cleaned MaStR data.
559
    Returns
560
    -------
561
    pandas.DataFrame
562
        DataFrame containing all unique combinations of
563
        zip codes with municipalities for geocoding.
564
    """
565
    return pd.DataFrame(
566
        data=clean_mastr_df.zip_and_municipality.unique(),
567
        columns=["zip_and_municipality"],
568
    )
569
570
571
@timer_func
572
def geocode_data(
573
    geocoding_df: pd.DataFrame,
574
    ratelimiter: RateLimiter,
575
    epsg: int,
576
) -> gpd.GeoDataFrame:
577
    """
578
    Geocode zip code and municipality.
579
    Extract latitude, longitude and altitude.
580
    Transfrom latitude and longitude to shapely
581
    Point and return a geopandas GeoDataFrame.
582
    Parameters
583
    -----------
584
    geocoding_df : pandas.DataFrame
585
        DataFrame containing all unique combinations of
586
        zip codes with municipalities for geocoding.
587
    ratelimiter : geopy.extra.rate_limiter.RateLimiter
588
        Nominatim RateLimiter geocoding class to use for geocoding.
589
    epsg : int
590
        EPSG ID to use as CRS.
591
    Returns
592
    -------
593
    geopandas.GeoDataFrame
594
        GeoDataFrame containing all unique combinations of
595
        zip codes with municipalities with matching geolocation.
596
    """
597
    logger.info(f"Geocoding {len(geocoding_df)} locations.")
598
599
    geocode_df = geocoding_df.assign(
600
        location=geocoding_df.zip_and_municipality.apply(ratelimiter)
601
    )
602
603
    geocode_df = geocode_df.assign(
604
        point=geocode_df.location.apply(
605
            lambda loc: tuple(loc.point) if loc else None
606
        )
607
    )
608
609
    geocode_df[["latitude", "longitude", "altitude"]] = pd.DataFrame(
610
        geocode_df.point.tolist(), index=geocode_df.index
611
    )
612
613
    return gpd.GeoDataFrame(
614
        geocode_df,
615
        geometry=gpd.points_from_xy(geocode_df.longitude, geocode_df.latitude),
616
        crs=f"EPSG:{epsg}",
617
    )
618
619
620
def merge_geocode_with_mastr(
621
    clean_mastr_df: pd.DataFrame, geocode_gdf: gpd.GeoDataFrame
622
) -> gpd.GeoDataFrame:
623
    """
624
    Merge geometry to original mastr data.
625
    Parameters
626
    -----------
627
    clean_mastr_df : pandas.DataFrame
628
        DataFrame containing cleaned MaStR data.
629
    geocode_gdf : geopandas.GeoDataFrame
630
        GeoDataFrame containing all unique combinations of
631
        zip codes with municipalities with matching geolocation.
632
    Returns
633
    -------
634
    gepandas.GeoDataFrame
635
        GeoDataFrame containing cleaned MaStR data with
636
        matching geolocation from geocoding.
637
    """
638
    return gpd.GeoDataFrame(
639
        clean_mastr_df.merge(
640
            geocode_gdf[["zip_and_municipality", "geometry"]],
641
            how="left",
642
            left_on="zip_and_municipality",
643
            right_on="zip_and_municipality",
644
        ),
645
        crs=geocode_gdf.crs,
646
    ).set_index(clean_mastr_df.index)
647
648
649
def drop_invalid_entries_from_gdf(
650
    gdf: gpd.GeoDataFrame,
651
) -> gpd.GeoDataFrame:
652
    """
653
    Drop invalid entries from geopandas GeoDataFrame.
654
    TODO: how to omit the logging from geos here???
655
    Parameters
656
    -----------
657
    gdf : geopandas.GeoDataFrame
658
        GeoDataFrame to be checked for validity.
659
    Returns
660
    -------
661
    gepandas.GeoDataFrame
662
        GeoDataFrame with rows with invalid geometries
663
        dropped.
664
    """
665
    valid_gdf = gdf.loc[gdf.is_valid]
666
667
    logger.debug(
668
        f"{len(gdf) - len(valid_gdf)} "
669
        f"({(len(gdf) - len(valid_gdf)) / len(gdf) * 100:g}%) "
670
        f"of {len(gdf)} values were invalid and are dropped."
671
    )
672
673
    return valid_gdf
674
675
676
@timer_func
677
def municipality_data() -> gpd.GeoDataFrame:
678
    """
679
    Get municipality data from eGo^n Database.
680
    Returns
681
    -------
682
    gepandas.GeoDataFrame
683
        GeoDataFrame with municipality data.
684
    """
685
    with db.session_scope() as session:
686
        query = session.query(Vg250Gem.ags, Vg250Gem.geometry.label("geom"))
687
688
    return gpd.read_postgis(
689
        query.statement, query.session.bind, index_col="ags"
690
    )
691
692
693
@timer_func
694
def add_ags_to_gens(
695
    valid_mastr_gdf: gpd.GeoDataFrame,
696
    municipalities_gdf: gpd.GeoDataFrame,
697
) -> gpd.GeoDataFrame:
698
    """
699
    Add information about AGS ID to generators.
700
    Parameters
701
    -----------
702
    valid_mastr_gdf : geopandas.GeoDataFrame
703
        GeoDataFrame with valid and cleaned MaStR data.
704
    municipalities_gdf : geopandas.GeoDataFrame
705
        GeoDataFrame with municipality data.
706
    Returns
707
    -------
708
    gepandas.GeoDataFrame
709
        GeoDataFrame with valid and cleaned MaStR data
710
        with AGS ID added.
711
    """
712
    return valid_mastr_gdf.sjoin(
713
        municipalities_gdf,
714
        how="left",
715
        predicate="intersects",
716
    ).rename(columns={"index_right": "ags"})
717
718
719
def drop_gens_outside_muns(
720
    valid_mastr_gdf: gpd.GeoDataFrame,
721
) -> gpd.GeoDataFrame:
722
    """
723
    Drop all generators outside of municipalities.
724
    Parameters
725
    -----------
726
    valid_mastr_gdf : geopandas.GeoDataFrame
727
        GeoDataFrame with valid and cleaned MaStR data.
728
    Returns
729
    -------
730
    gepandas.GeoDataFrame
731
        GeoDataFrame with valid and cleaned MaStR data
732
        with generatos without an AGS ID dropped.
733
    """
734
    gdf = valid_mastr_gdf.loc[~valid_mastr_gdf.ags.isna()]
735
736
    logger.debug(
737
        f"{len(valid_mastr_gdf) - len(gdf)} "
738
        f"({(len(valid_mastr_gdf) - len(gdf)) / len(valid_mastr_gdf) * 100:g}%) "
739
        f"of {len(valid_mastr_gdf)} values are outside of the municipalities"
740
        " and are therefore dropped."
741
    )
742
743
    return gdf
744
745
746
class EgonMastrPvRoofGeocoded(Base):
747
    __tablename__ = "egon_mastr_pv_roof_geocoded"
748
    __table_args__ = {"schema": "supply"}
749
750
    zip_and_municipality = Column(String, primary_key=True, index=True)
751
    location = Column(String)
752
    point = Column(String)
753
    latitude = Column(Float)
754
    longitude = Column(Float)
755
    altitude = Column(Float)
756
    geometry = Column(Geometry(srid=EPSG))
757
758
759
def create_geocoded_table(geocode_gdf):
760
    """
761
    Create geocoded table mastr pv rooftop
762
    Parameters
763
    -----------
764
    geocode_gdf : geopandas.GeoDataFrame
765
        GeoDataFrame containing geocoding information on pv rooftop locations.
766
    """
767
    EgonMastrPvRoofGeocoded.__table__.drop(bind=engine, checkfirst=True)
768
    EgonMastrPvRoofGeocoded.__table__.create(bind=engine, checkfirst=True)
769
770
    geocode_gdf.to_postgis(
771
        name=EgonMastrPvRoofGeocoded.__table__.name,
772
        schema=EgonMastrPvRoofGeocoded.__table__.schema,
773
        con=db.engine(),
774
        if_exists="append",
775
        index=False,
776
        # dtype={}
777
    )
778
779
780
def geocoded_data_from_db(
781
    epsg: str | int,
782
) -> gpd.GeoDataFrame:
783
    """
784
    Read OSM buildings data from eGo^n Database.
785
    Parameters
786
    -----------
787
    to_crs : pyproj.crs.crs.CRS
788
        CRS to transform geometries to.
789
    Returns
790
    -------
791
    geopandas.GeoDataFrame
792
        GeoDataFrame containing OSM buildings data.
793
    """
794
    with db.session_scope() as session:
795
        query = session.query(
796
            EgonMastrPvRoofGeocoded.zip_and_municipality,
797
            EgonMastrPvRoofGeocoded.geometry,
798
        )
799
800
    return gpd.read_postgis(
801
        query.statement, query.session.bind, geom_col="geometry"
802
    ).to_crs(f"EPSG:{epsg}")
803
804
805
def load_mastr_data():
806
    """Read PV rooftop data from MaStR CSV
807
    Note: the source will be replaced as soon as the MaStR data is available
808
    in DB.
809
    Returns
810
    -------
811
    geopandas.GeoDataFrame
812
        GeoDataFrame containing MaStR data with geocoded locations.
813
    """
814
    mastr_df = mastr_data(
815
        MASTR_INDEX_COL,
816
        MASTR_RELEVANT_COLS,
817
        MASTR_DTYPES,
818
        MASTR_PARSE_DATES,
819
    )
820
821
    clean_mastr_df = clean_mastr_data(
822
        mastr_df,
823
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
824
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
825
        seed=SEED,
826
        rounding=ROUNDING,
827
    )
828
829
    geocode_gdf = geocoded_data_from_db(EPSG)
830
831
    mastr_gdf = merge_geocode_with_mastr(clean_mastr_df, geocode_gdf)
832
833
    valid_mastr_gdf = drop_invalid_entries_from_gdf(mastr_gdf)
834
835
    municipalities_gdf = municipality_data()
836
837
    valid_mastr_gdf = add_ags_to_gens(valid_mastr_gdf, municipalities_gdf)
838
839
    return drop_gens_outside_muns(valid_mastr_gdf)
840
841
842
class OsmBuildingsFiltered(Base):
843
    __tablename__ = "osm_buildings_filtered"
844
    __table_args__ = {"schema": "openstreetmap"}
845
846
    osm_id = Column(BigInteger)
847
    amenity = Column(String)
848
    building = Column(String)
849
    name = Column(String)
850
    geom = Column(Geometry(srid=SRID), index=True)
851
    area = Column(Float)
852
    geom_point = Column(Geometry(srid=SRID), index=True)
853
    tags = Column(HSTORE)
854
    id = Column(BigInteger, primary_key=True, index=True)
855
856
857
@timer_func
858
def osm_buildings(
859
    to_crs: CRS,
860
) -> gpd.GeoDataFrame:
861
    """
862
    Read OSM buildings data from eGo^n Database.
863
    Parameters
864
    -----------
865
    to_crs : pyproj.crs.crs.CRS
866
        CRS to transform geometries to.
867
    Returns
868
    -------
869
    geopandas.GeoDataFrame
870
        GeoDataFrame containing OSM buildings data.
871
    """
872
    with db.session_scope() as session:
873
        query = session.query(
874
            OsmBuildingsFiltered.id,
875
            OsmBuildingsFiltered.area,
876
            OsmBuildingsFiltered.geom_point.label("geom"),
877
        )
878
879
    return gpd.read_postgis(
880
        query.statement, query.session.bind, index_col="id"
881
    ).to_crs(to_crs)
882
883
884
@timer_func
885
def synthetic_buildings(
886
    to_crs: CRS,
887
) -> gpd.GeoDataFrame:
888
    """
889
    Read synthetic buildings data from eGo^n Database.
890
    Parameters
891
    -----------
892
    to_crs : pyproj.crs.crs.CRS
893
        CRS to transform geometries to.
894
    Returns
895
    -------
896
    geopandas.GeoDataFrame
897
        GeoDataFrame containing OSM buildings data.
898
    """
899
    with db.session_scope() as session:
900
        query = session.query(
901
            OsmBuildingsSynthetic.id,
902
            OsmBuildingsSynthetic.area,
903
            OsmBuildingsSynthetic.geom_point.label("geom"),
904
        )
905
906
    return gpd.read_postgis(
907
        query.statement, query.session.bind, index_col="id"
908
    ).to_crs(to_crs)
909
910
911
@timer_func
912
def add_ags_to_buildings(
913
    buildings_gdf: gpd.GeoDataFrame,
914
    municipalities_gdf: gpd.GeoDataFrame,
915
) -> gpd.GeoDataFrame:
916
    """
917
    Add information about AGS ID to buildings.
918
    Parameters
919
    -----------
920
    buildings_gdf : geopandas.GeoDataFrame
921
        GeoDataFrame containing OSM buildings data.
922
    municipalities_gdf : geopandas.GeoDataFrame
923
        GeoDataFrame with municipality data.
924
    Returns
925
    -------
926
    gepandas.GeoDataFrame
927
        GeoDataFrame containing OSM buildings data
928
        with AGS ID added.
929
    """
930
    return buildings_gdf.sjoin(
931
        municipalities_gdf,
932
        how="left",
933
        predicate="intersects",
934
    ).rename(columns={"index_right": "ags"})
935
936
937
def drop_buildings_outside_muns(
938
    buildings_gdf: gpd.GeoDataFrame,
939
) -> gpd.GeoDataFrame:
940
    """
941
    Drop all buildings outside of municipalities.
942
    Parameters
943
    -----------
944
    buildings_gdf : geopandas.GeoDataFrame
945
        GeoDataFrame containing OSM buildings data.
946
    Returns
947
    -------
948
    gepandas.GeoDataFrame
949
        GeoDataFrame containing OSM buildings data
950
        with buildings without an AGS ID dropped.
951
    """
952
    gdf = buildings_gdf.loc[~buildings_gdf.ags.isna()]
953
954
    logger.debug(
955
        f"{len(buildings_gdf) - len(gdf)} "
956
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
957
        f"of {len(buildings_gdf)} values are outside of the municipalities "
958
        "and are therefore dropped."
959
    )
960
961
    return gdf
962
963
964
def egon_building_peak_loads():
965
    sql = """
966
    SELECT building_id
967
    FROM demand.egon_building_electricity_peak_loads
968
    WHERE scenario = 'eGon2035'
969
    """
970
971
    return (
972
        db.select_dataframe(sql).building_id.astype(int).sort_values().unique()
973
    )
974
975
976
@timer_func
977
def load_building_data():
978
    """
979
    Read buildings from DB
980
    Tables:
981
982
    * `openstreetmap.osm_buildings_filtered` (from OSM)
983
    * `openstreetmap.osm_buildings_synthetic` (synthetic, created by us)
984
985
    Use column `id` for both as it is unique hence you concat both datasets. If
986
    INCLUDE_SYNTHETIC_BUILDINGS is False synthetic buildings will not be loaded.
987
988
    Returns
989
    -------
990
    gepandas.GeoDataFrame
991
        GeoDataFrame containing OSM buildings data with buildings without an AGS ID
992
        dropped.
993
    """
994
995
    municipalities_gdf = municipality_data()
996
997
    osm_buildings_gdf = osm_buildings(municipalities_gdf.crs)
998
999
    if INCLUDE_SYNTHETIC_BUILDINGS:
1000
        synthetic_buildings_gdf = synthetic_buildings(municipalities_gdf.crs)
1001
1002
        buildings_gdf = gpd.GeoDataFrame(
1003
            pd.concat(
1004
                [
1005
                    osm_buildings_gdf,
1006
                    synthetic_buildings_gdf,
1007
                ]
1008
            ),
1009
            geometry="geom",
1010
            crs=osm_buildings_gdf.crs,
1011
        ).rename(columns={"area": "building_area"})
1012
1013
        buildings_gdf.index = buildings_gdf.index.astype(int)
1014
1015
    else:
1016
        buildings_gdf = osm_buildings_gdf.rename(
1017
            columns={"area": "building_area"}
1018
        )
1019
1020
    if ONLY_BUILDINGS_WITH_DEMAND:
1021
        building_ids = egon_building_peak_loads()
1022
1023
        init_len = len(building_ids)
1024
1025
        building_ids = np.intersect1d(
1026
            list(map(int, building_ids)),
1027
            list(map(int, buildings_gdf.index.to_numpy())),
1028
        )
1029
1030
        end_len = len(building_ids)
1031
1032
        logger.debug(
1033
            f"{end_len/init_len * 100: g} % ({end_len} / {init_len}) of buildings have "
1034
            f"peak load."
1035
        )
1036
1037
        buildings_gdf = buildings_gdf.loc[building_ids]
1038
1039
    buildings_ags_gdf = add_ags_to_buildings(buildings_gdf, municipalities_gdf)
1040
1041
    buildings_ags_gdf = drop_buildings_outside_muns(buildings_ags_gdf)
1042
1043
    grid_districts_gdf = grid_districts(EPSG)
1044
1045
    federal_state_gdf = federal_state_data(grid_districts_gdf.crs)
1046
1047
    grid_federal_state_gdf = overlay_grid_districts_with_counties(
1048
        grid_districts_gdf,
1049
        federal_state_gdf,
1050
    )
1051
1052
    buildings_overlay_gdf = add_overlay_id_to_buildings(
1053
        buildings_ags_gdf,
1054
        grid_federal_state_gdf,
1055
    )
1056
1057
    logger.debug("Loaded buildings.")
1058
1059
    buildings_overlay_gdf = drop_buildings_outside_grids(buildings_overlay_gdf)
1060
1061
    # overwrite bus_id with data from new table
1062
    sql = "SELECT building_id, bus_id FROM boundaries.egon_map_zensus_mvgd_buildings"
1063
    map_building_bus_df = db.select_dataframe(sql)
1064
1065
    building_ids = np.intersect1d(
1066
        list(map(int, map_building_bus_df.building_id.unique())),
1067
        list(map(int, buildings_overlay_gdf.index.to_numpy())),
1068
    )
1069
1070
    buildings_within_gdf = buildings_overlay_gdf.loc[building_ids]
1071
1072
    return (
1073
        buildings_within_gdf.drop(columns=["bus_id"])
1074
        .merge(
1075
            how="left",
1076
            right=map_building_bus_df,
1077
            left_index=True,
1078
            right_on="building_id",
1079
        )
1080
        .drop(columns=["building_id"])
1081
    )
1082
1083
1084
@timer_func
1085
def sort_and_qcut_df(
1086
    df: pd.DataFrame | gpd.GeoDataFrame,
1087
    col: str,
1088
    q: int,
1089
) -> pd.DataFrame | gpd.GeoDataFrame:
1090
    """
1091
    Determine the quantile of a given attribute in a (Geo)DataFrame.
1092
    Sort the (Geo)DataFrame in ascending order for the given attribute.
1093
    Parameters
1094
    -----------
1095
    df : pandas.DataFrame or geopandas.GeoDataFrame
1096
        (Geo)DataFrame to sort and qcut.
1097
    col : str
1098
        Name of the attribute to sort and qcut the (Geo)DataFrame on.
1099
    q : int
1100
        Number of quantiles.
1101
    Returns
1102
    -------
1103
    pandas.DataFrame or gepandas.GeoDataFrame
1104
        Sorted and qcut (Geo)DataFrame.
1105
    """
1106
    df = df.sort_values(col, ascending=True)
1107
1108
    return df.assign(
1109
        quant=pd.qcut(
1110
            df[col],
1111
            q=q,
1112
            labels=range(q),
1113
        )
1114
    )
1115
1116
1117
@timer_func
1118
def allocate_pv(
1119
    q_mastr_gdf: gpd.GeoDataFrame,
1120
    q_buildings_gdf: gpd.GeoDataFrame,
1121
    seed: int,
1122
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
1123
    """
1124
    Allocate the MaStR pv generators to the OSM buildings.
1125
    This will determine a building for each pv generator if there are more
1126
    buildings than generators within a given AGS. Primarily generators are
1127
    distributed with the same qunatile as the buildings. Multiple assignment
1128
    is excluded.
1129
    Parameters
1130
    -----------
1131
    q_mastr_gdf : geopandas.GeoDataFrame
1132
        GeoDataFrame containing geocoded and qcut MaStR data.
1133
    q_buildings_gdf : geopandas.GeoDataFrame
1134
        GeoDataFrame containing qcut OSM buildings data.
1135
    seed : int
1136
        Seed to use for random operations with NumPy and pandas.
1137
    Returns
1138
    -------
1139
    tuple with two geopandas.GeoDataFrame s
1140
        GeoDataFrame containing MaStR data allocated to building IDs.
1141
        GeoDataFrame containing building data allocated to MaStR IDs.
1142
    """
1143
    rng = default_rng(seed=seed)
1144
1145
    q_buildings_gdf = q_buildings_gdf.assign(gens_id=np.nan)
1146
    q_mastr_gdf = q_mastr_gdf.assign(building_id=np.nan)
1147
1148
    ags_list = q_buildings_gdf.ags.unique()
1149
1150
    if TEST_RUN:
1151
        ags_list = ags_list[:250]
1152
1153
    num_ags = len(ags_list)
1154
1155
    t0 = perf_counter()
1156
1157
    for count, ags in enumerate(ags_list):
1158
1159
        buildings = q_buildings_gdf.loc[
1160
            (q_buildings_gdf.ags == ags) & (q_buildings_gdf.gens_id.isna())
1161
        ]
1162
        gens = q_mastr_gdf.loc[
1163
            (q_mastr_gdf.ags == ags) & (q_mastr_gdf.building_id.isna())
1164
        ]
1165
1166
        len_build = len(buildings)
1167
        len_gens = len(gens)
1168
1169
        if len_build < len_gens:
1170
            gens = gens.sample(len_build, random_state=RandomState(seed=seed))
1171
            logger.error(
1172
                f"There are {len_gens} generators and only {len_build}"
1173
                f" buildings in AGS {ags}. {len_gens - len(gens)} "
1174
                "generators were truncated to match the amount of buildings."
1175
            )
1176
1177
            assert len_build == len(gens)
1178
1179
        for quant in gens.quant.unique():
1180
            q_buildings = buildings.loc[
1181
                (buildings.quant == quant) & (buildings.gens_id.isna())
1182
            ]
1183
            q_gens = gens.loc[gens.quant == quant]
1184
1185
            len_build = len(q_buildings)
1186
            len_gens = len(q_gens)
1187
1188
            if len_build < len_gens:
1189
                delta = len_gens - len_build
1190
1191
                logger.warning(
1192
                    f"There are {len_gens} generators and only {len_build} "
1193
                    f"buildings in AGS {ags} and quantile {quant}. {delta} "
1194
                    f"buildings from AGS {ags} will be added randomly."
1195
                )
1196
1197
                add_buildings = pd.Index(
1198
                    rng.choice(
1199
                        buildings.loc[
1200
                            (buildings.quant != quant)
1201
                            & (buildings.gens_id.isna())
1202
                        ].index,
1203
                        size=delta,
1204
                        replace=False,
1205
                    )
1206
                )
1207
1208
                q_buildings = buildings.loc[
1209
                    q_buildings.index.append(add_buildings)
1210
                ]
1211
1212
                assert len(q_buildings) == len_gens
1213
1214
            chosen_buildings = pd.Index(
1215
                rng.choice(
1216
                    q_buildings.index,
1217
                    size=len_gens,
1218
                    replace=False,
1219
                )
1220
            )
1221
1222
            # q_mastr_gdf.loc[q_gens.index, "building_id"] = chosen_buildings
1223
            q_buildings_gdf.loc[chosen_buildings, "gens_id"] = q_gens.index
1224
1225
        if count % 100 == 0:
1226
            logger.debug(
1227
                f"Allocation of {count / num_ags * 100:g} % of AGS done. It took "
1228
                f"{perf_counter() - t0:g} seconds."
1229
            )
1230
1231
            t0 = perf_counter()
1232
1233
    assigned_buildings = q_buildings_gdf.loc[~q_buildings_gdf.gens_id.isna()]
1234
1235
    q_mastr_gdf.loc[
1236
        assigned_buildings.gens_id, "building_id"
1237
    ] = assigned_buildings.index
1238
1239
    logger.debug("Allocated status quo generators to buildings.")
1240
1241
    return frame_to_numeric(q_mastr_gdf), frame_to_numeric(q_buildings_gdf)
1242
1243
1244
def frame_to_numeric(
1245
    df: pd.DataFrame | gpd.GeoDataFrame,
1246
) -> pd.DataFrame | gpd.GeoDataFrame:
1247
    """
1248
    Try to convert all columns of a DataFrame to numeric ignoring errors.
1249
    Parameters
1250
    ----------
1251
    df : pandas.DataFrame or geopandas.GeoDataFrame
1252
    Returns
1253
    -------
1254
    pandas.DataFrame or geopandas.GeoDataFrame
1255
    """
1256
    if str(df.index.dtype) == "object":
1257
        df.index = pd.to_numeric(df.index, errors="ignore")
1258
1259
    for col in df.columns:
1260
        if str(df[col].dtype) == "object":
1261
            df[col] = pd.to_numeric(df[col], errors="ignore")
1262
1263
    return df
1264
1265
1266
def validate_output(
1267
    desagg_mastr_gdf: pd.DataFrame | gpd.GeoDataFrame,
1268
    desagg_buildings_gdf: pd.DataFrame | gpd.GeoDataFrame,
1269
) -> None:
1270
    """
1271
    Validate output.
1272
1273
    * Validate that there are exactly as many buildings with a pv system as there are
1274
      pv systems with a building
1275
    * Validate that the building IDs with a pv system are the same building IDs as
1276
      assigned to the pv systems
1277
    * Validate that the pv system IDs with a building are the same pv system IDs as
1278
      assigned to the buildings
1279
1280
    Parameters
1281
    -----------
1282
    desagg_mastr_gdf : geopandas.GeoDataFrame
1283
        GeoDataFrame containing MaStR data allocated to building IDs.
1284
    desagg_buildings_gdf : geopandas.GeoDataFrame
1285
        GeoDataFrame containing building data allocated to MaStR IDs.
1286
    """
1287
    assert len(
1288
        desagg_mastr_gdf.loc[~desagg_mastr_gdf.building_id.isna()]
1289
    ) == len(desagg_buildings_gdf.loc[~desagg_buildings_gdf.gens_id.isna()])
1290
    assert (
1291
        np.sort(
1292
            desagg_mastr_gdf.loc[
1293
                ~desagg_mastr_gdf.building_id.isna()
1294
            ].building_id.unique()
1295
        )
1296
        == np.sort(
1297
            desagg_buildings_gdf.loc[
1298
                ~desagg_buildings_gdf.gens_id.isna()
1299
            ].index.unique()
1300
        )
1301
    ).all()
1302
    assert (
1303
        np.sort(
1304
            desagg_mastr_gdf.loc[
1305
                ~desagg_mastr_gdf.building_id.isna()
1306
            ].index.unique()
1307
        )
1308
        == np.sort(
1309
            desagg_buildings_gdf.loc[
1310
                ~desagg_buildings_gdf.gens_id.isna()
1311
            ].gens_id.unique()
1312
        )
1313
    ).all()
1314
1315
    logger.debug("Validated output.")
1316
1317
1318
def drop_unallocated_gens(
1319
    gdf: gpd.GeoDataFrame,
1320
) -> gpd.GeoDataFrame:
1321
    """
1322
    Drop generators which did not get allocated.
1323
1324
    Parameters
1325
    -----------
1326
    gdf : geopandas.GeoDataFrame
1327
        GeoDataFrame containing MaStR data allocated to building IDs.
1328
    Returns
1329
    -------
1330
    geopandas.GeoDataFrame
1331
        GeoDataFrame containing MaStR data with generators dropped which did not get
1332
        allocated.
1333
    """
1334
    init_len = len(gdf)
1335
    gdf = gdf.loc[~gdf.building_id.isna()]
1336
    end_len = len(gdf)
1337
1338
    logger.debug(
1339
        f"Dropped {init_len - end_len} "
1340
        f"({((init_len - end_len) / init_len) * 100:g}%)"
1341
        f" of {init_len} unallocated rows from MaStR DataFrame."
1342
    )
1343
1344
    return gdf
1345
1346
1347
@timer_func
1348
def allocate_to_buildings(
1349
    mastr_gdf: gpd.GeoDataFrame,
1350
    buildings_gdf: gpd.GeoDataFrame,
1351
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
1352
    """
1353
    Allocate status quo pv rooftop generators to buildings.
1354
    Parameters
1355
    -----------
1356
    mastr_gdf : geopandas.GeoDataFrame
1357
        GeoDataFrame containing MaStR data with geocoded locations.
1358
    buildings_gdf : geopandas.GeoDataFrame
1359
        GeoDataFrame containing OSM buildings data with buildings without an AGS ID
1360
        dropped.
1361
    Returns
1362
    -------
1363
    tuple with two geopandas.GeoDataFrame s
1364
        GeoDataFrame containing MaStR data allocated to building IDs.
1365
        GeoDataFrame containing building data allocated to MaStR IDs.
1366
    """
1367
    logger.debug("Starting allocation of status quo.")
1368
1369
    q_mastr_gdf = sort_and_qcut_df(mastr_gdf, col="capacity", q=Q)
1370
    q_buildings_gdf = sort_and_qcut_df(buildings_gdf, col="building_area", q=Q)
1371
1372
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_pv(
1373
        q_mastr_gdf, q_buildings_gdf, SEED
1374
    )
1375
1376
    validate_output(desagg_mastr_gdf, desagg_buildings_gdf)
1377
1378
    return drop_unallocated_gens(desagg_mastr_gdf), desagg_buildings_gdf
1379
1380
1381
@timer_func
1382
def grid_districts(
1383
    epsg: int,
1384
) -> gpd.GeoDataFrame:
1385
    """
1386
    Load mv grid district geo data from eGo^n Database as
1387
    geopandas.GeoDataFrame.
1388
    Parameters
1389
    -----------
1390
    epsg : int
1391
        EPSG ID to use as CRS.
1392
    Returns
1393
    -------
1394
    geopandas.GeoDataFrame
1395
        GeoDataFrame containing mv grid district ID and geo shapes data.
1396
    """
1397
    gdf = db.select_geodataframe(
1398
        """
1399
        SELECT bus_id, geom
1400
        FROM grid.egon_mv_grid_district
1401
        ORDER BY bus_id
1402
        """,
1403
        index_col="bus_id",
1404
        geom_col="geom",
1405
        epsg=epsg,
1406
    )
1407
1408
    gdf.index = gdf.index.astype(int)
1409
1410
    logger.debug("Grid districts loaded.")
1411
1412
    return gdf
1413
1414
1415
def scenario_data(
1416
    carrier: str = "solar_rooftop",
1417
    scenario: str = "eGon2035",
1418
) -> pd.DataFrame:
1419
    """
1420
    Get scenario capacity data from eGo^n Database.
1421
    Parameters
1422
    -----------
1423
    carrier : str
1424
        Carrier type to filter table by.
1425
    scenario : str
1426
        Scenario to filter table by.
1427
    Returns
1428
    -------
1429
    geopandas.GeoDataFrame
1430
        GeoDataFrame with scenario capacity data in GW.
1431
    """
1432
    with db.session_scope() as session:
1433
        query = session.query(EgonScenarioCapacities).filter(
1434
            EgonScenarioCapacities.carrier == carrier,
1435
            EgonScenarioCapacities.scenario_name == scenario,
1436
        )
1437
1438
    df = pd.read_sql(
1439
        query.statement, query.session.bind, index_col="index"
1440
    ).sort_index()
1441
1442
    logger.debug("Scenario capacity data loaded.")
1443
1444
    return df
1445
1446
1447 View Code Duplication
class Vg250Lan(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1448
    __tablename__ = "vg250_lan"
1449
    __table_args__ = {"schema": "boundaries"}
1450
1451
    id = Column(BigInteger, primary_key=True, index=True)
1452
    ade = Column(BigInteger)
1453
    gf = Column(BigInteger)
1454
    bsg = Column(BigInteger)
1455
    ars = Column(String)
1456
    ags = Column(String)
1457
    sdv_ars = Column(String)
1458
    gen = Column(String)
1459
    bez = Column(String)
1460
    ibz = Column(BigInteger)
1461
    bem = Column(String)
1462
    nbd = Column(String)
1463
    sn_l = Column(String)
1464
    sn_r = Column(String)
1465
    sn_k = Column(String)
1466
    sn_v1 = Column(String)
1467
    sn_v2 = Column(String)
1468
    sn_g = Column(String)
1469
    fk_s3 = Column(String)
1470
    nuts = Column(String)
1471
    ars_0 = Column(String)
1472
    ags_0 = Column(String)
1473
    wsk = Column(String)
1474
    debkg_id = Column(String)
1475
    rs = Column(String)
1476
    sdv_rs = Column(String)
1477
    rs_0 = Column(String)
1478
    geometry = Column(Geometry(srid=EPSG), index=True)
1479
1480
1481
def federal_state_data(to_crs: CRS) -> gpd.GeoDataFrame:
1482
    """
1483
    Get feder state data from eGo^n Database.
1484
    Parameters
1485
    -----------
1486
    to_crs : pyproj.crs.crs.CRS
1487
        CRS to transform geometries to.
1488
    Returns
1489
    -------
1490
    geopandas.GeoDataFrame
1491
        GeoDataFrame with federal state data.
1492
    """
1493
    with db.session_scope() as session:
1494
        query = session.query(
1495
            Vg250Lan.id, Vg250Lan.nuts, Vg250Lan.geometry.label("geom")
1496
        )
1497
1498
        gdf = gpd.read_postgis(
1499
            query.statement, session.connection(), index_col="id"
1500
        ).to_crs(to_crs)
1501
1502
    logger.debug("Federal State data loaded.")
1503
1504
    return gdf
1505
1506
1507
@timer_func
1508
def overlay_grid_districts_with_counties(
1509
    mv_grid_district_gdf: gpd.GeoDataFrame,
1510
    federal_state_gdf: gpd.GeoDataFrame,
1511
) -> gpd.GeoDataFrame:
1512
    """
1513
    Calculate the intersections of mv grid districts and counties.
1514
    Parameters
1515
    -----------
1516
    mv_grid_district_gdf : gpd.GeoDataFrame
1517
        GeoDataFrame containing mv grid district ID and geo shapes data.
1518
    federal_state_gdf : gpd.GeoDataFrame
1519
        GeoDataFrame with federal state data.
1520
    Returns
1521
    -------
1522
    geopandas.GeoDataFrame
1523
        GeoDataFrame containing OSM buildings data.
1524
    """
1525
    logger.debug(
1526
        "Calculating intersection overlay between mv grid districts and "
1527
        "counties. This may take a while..."
1528
    )
1529
1530
    gdf = gpd.overlay(
1531
        federal_state_gdf.to_crs(mv_grid_district_gdf.crs),
1532
        mv_grid_district_gdf.reset_index(),
1533
        how="intersection",
1534
        keep_geom_type=True,
1535
    )
1536
1537
    logger.debug("Done!")
1538
1539
    return gdf
1540
1541
1542
@timer_func
1543
def add_overlay_id_to_buildings(
1544
    buildings_gdf: gpd.GeoDataFrame,
1545
    grid_federal_state_gdf: gpd.GeoDataFrame,
1546
) -> gpd.GeoDataFrame:
1547
    """
1548
    Add information about overlay ID to buildings.
1549
    Parameters
1550
    -----------
1551
    buildings_gdf : geopandas.GeoDataFrame
1552
        GeoDataFrame containing OSM buildings data.
1553
    grid_federal_state_gdf : geopandas.GeoDataFrame
1554
        GeoDataFrame with intersection shapes between counties and grid districts.
1555
    Returns
1556
    -------
1557
    geopandas.GeoDataFrame
1558
        GeoDataFrame containing OSM buildings data with overlay ID added.
1559
    """
1560
    gdf = (
1561
        buildings_gdf.to_crs(grid_federal_state_gdf.crs)
1562
        .sjoin(
1563
            grid_federal_state_gdf,
1564
            how="left",
1565
            predicate="intersects",
1566
        )
1567
        .rename(columns={"index_right": "overlay_id"})
1568
    )
1569
1570
    logger.debug("Added overlay ID to OSM buildings.")
1571
1572
    return gdf
1573
1574
1575
def drop_buildings_outside_grids(
1576
    buildings_gdf: gpd.GeoDataFrame,
1577
) -> gpd.GeoDataFrame:
1578
    """
1579
    Drop all buildings outside of grid areas.
1580
    Parameters
1581
    -----------
1582
    buildings_gdf : geopandas.GeoDataFrame
1583
        GeoDataFrame containing OSM buildings data.
1584
    Returns
1585
    -------
1586
    gepandas.GeoDataFrame
1587
        GeoDataFrame containing OSM buildings data
1588
        with buildings without an bus ID dropped.
1589
    """
1590
    gdf = buildings_gdf.loc[~buildings_gdf.bus_id.isna()]
1591
1592
    logger.debug(
1593
        f"{len(buildings_gdf) - len(gdf)} "
1594
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
1595
        f"of {len(buildings_gdf)} values are outside of the grid areas "
1596
        "and are therefore dropped."
1597
    )
1598
1599
    return gdf
1600
1601
1602
def cap_per_bus_id(
1603
    scenario: str,
1604
) -> pd.DataFrame:
1605
    """
1606
    Get table with total pv rooftop capacity per grid district.
1607
1608
    Parameters
1609
    -----------
1610
    scenario : str
1611
        Scenario name.
1612
    Returns
1613
    -------
1614
    pandas.DataFrame
1615
        DataFrame with total rooftop capacity per mv grid.
1616
    """
1617
    targets = config.datasets()["solar_rooftop"]["targets"]
1618
1619
    sql = f"""
1620
    SELECT bus as bus_id, p_nom as capacity
1621
    FROM {targets['generators']['schema']}.{targets['generators']['table']}
1622
    WHERE carrier = 'solar_rooftop'
1623
    AND scn_name = '{scenario}'
1624
    """
1625
1626
    return db.select_dataframe(sql, index_col="bus_id")
1627
1628
    # overlay_gdf = overlay_gdf.assign(capacity=np.nan)
1629
    #
1630
    # for cap, nuts in scenario_df[["capacity", "nuts"]].itertuples(index=False):
1631
    #     nuts_gdf = overlay_gdf.loc[overlay_gdf.nuts == nuts]
1632
    #
1633
    #     capacity = nuts_gdf.building_area.multiply(
1634
    #         cap / nuts_gdf.building_area.sum()
1635
    #     )
1636
    #
1637
    #     overlay_gdf.loc[nuts_gdf.index] = overlay_gdf.loc[
1638
    #         nuts_gdf.index
1639
    #     ].assign(capacity=capacity.multiply(conversion).to_numpy())
1640
    #
1641
    # return overlay_gdf[["bus_id", "capacity"]].groupby("bus_id").sum()
1642
1643
1644
def determine_end_of_life_gens(
1645
    mastr_gdf: gpd.GeoDataFrame,
1646
    scenario_timestamp: pd.Timestamp,
1647
    pv_rooftop_lifetime: pd.Timedelta,
1648
) -> gpd.GeoDataFrame:
1649
    """
1650
    Determine if an old PV system has reached its end of life.
1651
    Parameters
1652
    -----------
1653
    mastr_gdf : geopandas.GeoDataFrame
1654
        GeoDataFrame containing geocoded MaStR data.
1655
    scenario_timestamp : pandas.Timestamp
1656
        Timestamp at which the scenario takes place.
1657
    pv_rooftop_lifetime : pandas.Timedelta
1658
        Average expected lifetime of PV rooftop systems.
1659
    Returns
1660
    -------
1661
    geopandas.GeoDataFrame
1662
        GeoDataFrame containing geocoded MaStR data and info if the system
1663
        has reached its end of life.
1664
    """
1665
    before = mastr_gdf.capacity.sum()
1666
1667
    mastr_gdf = mastr_gdf.assign(
1668
        age=scenario_timestamp - mastr_gdf.start_up_date
1669
    )
1670
1671
    mastr_gdf = mastr_gdf.assign(
1672
        end_of_life=pv_rooftop_lifetime < mastr_gdf.age
1673
    )
1674
1675
    after = mastr_gdf.loc[~mastr_gdf.end_of_life].capacity.sum()
1676
1677
    logger.debug(
1678
        "Determined if pv rooftop systems reached their end of life.\nTotal capacity: "
1679
        f"{before}\nActive capacity: {after}"
1680
    )
1681
1682
    return mastr_gdf
1683
1684
1685
def calculate_max_pv_cap_per_building(
1686
    buildings_gdf: gpd.GeoDataFrame,
1687
    mastr_gdf: gpd.GeoDataFrame,
1688
    pv_cap_per_sq_m: float | int,
1689
    roof_factor: float | int,
1690
) -> gpd.GeoDataFrame:
1691
    """
1692
    Calculate the estimated maximum possible PV capacity per building.
1693
    Parameters
1694
    -----------
1695
    buildings_gdf : geopandas.GeoDataFrame
1696
        GeoDataFrame containing OSM buildings data.
1697
    mastr_gdf : geopandas.GeoDataFrame
1698
        GeoDataFrame containing geocoded MaStR data.
1699
    pv_cap_per_sq_m : float, int
1700
        Average expected, installable PV capacity per square meter.
1701
    roof_factor : float, int
1702
        Average for PV usable roof area share.
1703
    Returns
1704
    -------
1705
    geopandas.GeoDataFrame
1706
        GeoDataFrame containing OSM buildings data with estimated maximum PV
1707
        capacity.
1708
    """
1709
    gdf = (
1710
        buildings_gdf.reset_index()
1711
        .rename(columns={"index": "id"})
1712
        .merge(
1713
            mastr_gdf[
1714
                [
1715
                    "capacity",
1716
                    "end_of_life",
1717
                    "building_id",
1718
                    "EinheitlicheAusrichtungUndNeigungswinkel",
1719
                    "Hauptausrichtung",
1720
                    "HauptausrichtungNeigungswinkel",
1721
                ]
1722
            ],
1723
            how="left",
1724
            left_on="id",
1725
            right_on="building_id",
1726
        )
1727
        .set_index("id")
1728
        .drop(columns="building_id")
1729
    )
1730
1731
    return gdf.assign(
1732
        max_cap=gdf.building_area.multiply(roof_factor * pv_cap_per_sq_m),
1733
        end_of_life=gdf.end_of_life.fillna(True).astype(bool),
1734
        bus_id=gdf.bus_id.astype(int),
1735
    )
1736
1737
1738
def calculate_building_load_factor(
1739
    mastr_gdf: gpd.GeoDataFrame,
1740
    buildings_gdf: gpd.GeoDataFrame,
1741
    rounding: int = 4,
1742
) -> gpd.GeoDataFrame:
1743
    """
1744
    Calculate the roof load factor from existing PV systems.
1745
    Parameters
1746
    -----------
1747
    mastr_gdf : geopandas.GeoDataFrame
1748
        GeoDataFrame containing geocoded MaStR data.
1749
    buildings_gdf : geopandas.GeoDataFrame
1750
        GeoDataFrame containing OSM buildings data.
1751
    rounding : int
1752
        Rounding to use for load factor.
1753
    Returns
1754
    -------
1755
    geopandas.GeoDataFrame
1756
        GeoDataFrame containing geocoded MaStR data with calculated load factor.
1757
    """
1758
    gdf = mastr_gdf.merge(
1759
        buildings_gdf[["max_cap", "building_area"]].loc[
1760
            ~buildings_gdf["max_cap"].isna()
1761
        ],
1762
        how="left",
1763
        left_on="building_id",
1764
        right_index=True,
1765
    )
1766
1767
    return gdf.assign(load_factor=(gdf.capacity / gdf.max_cap).round(rounding))
1768
1769
1770
def get_probability_for_property(
1771
    mastr_gdf: gpd.GeoDataFrame,
1772
    cap_range: tuple[int | float, int | float],
1773
    prop: str,
1774
) -> tuple[np.array, np.array]:
1775
    """
1776
    Calculate the probability of the different options of a property of the
1777
    existing PV plants.
1778
    Parameters
1779
    -----------
1780
    mastr_gdf : geopandas.GeoDataFrame
1781
        GeoDataFrame containing geocoded MaStR data.
1782
    cap_range : tuple(int, int)
1783
        Capacity range of PV plants to look at.
1784
    prop : str
1785
        Property to calculate probabilities for. String needs to be in columns
1786
        of mastr_gdf.
1787
    Returns
1788
    -------
1789
    tuple
1790
        numpy.array
1791
            Unique values of property.
1792
        numpy.array
1793
            Probabilties per unique value.
1794
    """
1795
    cap_range_gdf = mastr_gdf.loc[
1796
        (mastr_gdf.capacity > cap_range[0])
1797
        & (mastr_gdf.capacity <= cap_range[1])
1798
    ]
1799
1800
    if prop == "load_factor":
1801
        cap_range_gdf = cap_range_gdf.loc[cap_range_gdf[prop] <= 1]
1802
1803
    count = Counter(
1804
        cap_range_gdf[prop].loc[
1805
            ~cap_range_gdf[prop].isna()
1806
            & ~cap_range_gdf[prop].isnull()
1807
            & ~(cap_range_gdf[prop] == "None")
1808
        ]
1809
    )
1810
1811
    values = np.array(list(count.keys()))
1812
    probabilities = np.fromiter(count.values(), dtype=float)
1813
    probabilities = probabilities / np.sum(probabilities)
1814
1815
    return values, probabilities
1816
1817
1818
@timer_func
1819
def probabilities(
1820
    mastr_gdf: gpd.GeoDataFrame,
1821
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1822
    properties: list[str] | None = None,
1823
) -> dict:
1824
    """
1825
    Calculate the probability of the different options of properties of the
1826
    existing PV plants.
1827
    Parameters
1828
    -----------
1829
    mastr_gdf : geopandas.GeoDataFrame
1830
        GeoDataFrame containing geocoded MaStR data.
1831
    cap_ranges : list(tuple(int, int))
1832
        List of capacity ranges to distinguish between. The first tuple should
1833
        start with a zero and the last one should end with infinite.
1834
    properties : list(str)
1835
        List of properties to calculate probabilities for. Strings needs to be
1836
        in columns of mastr_gdf.
1837
    Returns
1838
    -------
1839
    dict
1840
        Dictionary with values and probabilities per capacity range.
1841
    """
1842
    if cap_ranges is None:
1843
        cap_ranges = [
1844
            (0, 30),
1845
            (30, 100),
1846
            (100, float("inf")),
1847
        ]
1848
    if properties is None:
1849
        properties = [
1850
            "EinheitlicheAusrichtungUndNeigungswinkel",
1851
            "Hauptausrichtung",
1852
            "HauptausrichtungNeigungswinkel",
1853
            "load_factor",
1854
        ]
1855
1856
    prob_dict = {}
1857
1858
    for cap_range in cap_ranges:
1859
        prob_dict[cap_range] = {
1860
            "values": {},
1861
            "probabilities": {},
1862
        }
1863
1864
        for prop in properties:
1865
            v, p = get_probability_for_property(
1866
                mastr_gdf,
1867
                cap_range,
1868
                prop,
1869
            )
1870
1871
            prob_dict[cap_range]["values"][prop] = v
1872
            prob_dict[cap_range]["probabilities"][prop] = p
1873
1874
    return prob_dict
1875
1876
1877
def cap_share_per_cap_range(
1878
    mastr_gdf: gpd.GeoDataFrame,
1879
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1880
) -> dict[tuple[int | float, int | float], float]:
1881
    """
1882
    Calculate the share of PV capacity from the total PV capacity within
1883
    capacity ranges.
1884
    Parameters
1885
    -----------
1886
    mastr_gdf : geopandas.GeoDataFrame
1887
        GeoDataFrame containing geocoded MaStR data.
1888
    cap_ranges : list(tuple(int, int))
1889
        List of capacity ranges to distinguish between. The first tuple should
1890
        start with a zero and the last one should end with infinite.
1891
    Returns
1892
    -------
1893
    dict
1894
        Dictionary with share of PV capacity from the total PV capacity within
1895
        capacity ranges.
1896
    """
1897
    if cap_ranges is None:
1898
        cap_ranges = [
1899
            (0, 30),
1900
            (30, 100),
1901
            (100, float("inf")),
1902
        ]
1903
1904
    cap_share_dict = {}
1905
1906
    total_cap = mastr_gdf.capacity.sum()
1907
1908
    for cap_range in cap_ranges:
1909
        cap_share = (
1910
            mastr_gdf.loc[
1911
                (mastr_gdf.capacity > cap_range[0])
1912
                & (mastr_gdf.capacity <= cap_range[1])
1913
            ].capacity.sum()
1914
            / total_cap
1915
        )
1916
1917
        cap_share_dict[cap_range] = cap_share
1918
1919
    return cap_share_dict
1920
1921
1922
def mean_load_factor_per_cap_range(
1923
    mastr_gdf: gpd.GeoDataFrame,
1924
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1925
) -> dict[tuple[int | float, int | float], float]:
1926
    """
1927
    Calculate the mean roof load factor per capacity range from existing PV
1928
    plants.
1929
    Parameters
1930
    -----------
1931
    mastr_gdf : geopandas.GeoDataFrame
1932
        GeoDataFrame containing geocoded MaStR data.
1933
    cap_ranges : list(tuple(int, int))
1934
        List of capacity ranges to distinguish between. The first tuple should
1935
        start with a zero and the last one should end with infinite.
1936
    Returns
1937
    -------
1938
    dict
1939
        Dictionary with mean roof load factor per capacity range.
1940
    """
1941
    if cap_ranges is None:
1942
        cap_ranges = [
1943
            (0, 30),
1944
            (30, 100),
1945
            (100, float("inf")),
1946
        ]
1947
1948
    load_factor_dict = {}
1949
1950
    for cap_range in cap_ranges:
1951
        load_factor = mastr_gdf.loc[
1952
            (mastr_gdf.load_factor <= 1)
1953
            & (mastr_gdf.capacity > cap_range[0])
1954
            & (mastr_gdf.capacity <= cap_range[1])
1955
        ].load_factor.mean()
1956
1957
        load_factor_dict[cap_range] = load_factor
1958
1959
    return load_factor_dict
1960
1961
1962
def building_area_range_per_cap_range(
1963
    mastr_gdf: gpd.GeoDataFrame,
1964
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1965
    min_building_size: int | float = 10.0,
1966
    upper_quantile: float = 0.95,
1967
    lower_quantile: float = 0.05,
1968
) -> dict[tuple[int | float, int | float], tuple[int | float, int | float]]:
1969
    """
1970
    Estimate normal building area range per capacity range.
1971
    Calculate the mean roof load factor per capacity range from existing PV
1972
    plants.
1973
    Parameters
1974
    -----------
1975
    mastr_gdf : geopandas.GeoDataFrame
1976
        GeoDataFrame containing geocoded MaStR data.
1977
    cap_ranges : list(tuple(int, int))
1978
        List of capacity ranges to distinguish between. The first tuple should
1979
        start with a zero and the last one should end with infinite.
1980
    min_building_size : int, float
1981
        Minimal building size to consider for PV plants.
1982
    upper_quantile : float
1983
        Upper quantile to estimate maximum building size per capacity range.
1984
    lower_quantile : float
1985
        Lower quantile to estimate minimum building size per capacity range.
1986
    Returns
1987
    -------
1988
    dict
1989
        Dictionary with estimated normal building area range per capacity
1990
        range.
1991
    """
1992
    if cap_ranges is None:
1993
        cap_ranges = [
1994
            (0, 30),
1995
            (30, 100),
1996
            (100, float("inf")),
1997
        ]
1998
1999
    building_area_range_dict = {}
2000
2001
    n_ranges = len(cap_ranges)
2002
2003
    for count, cap_range in enumerate(cap_ranges):
2004
        cap_range_gdf = mastr_gdf.loc[
2005
            (mastr_gdf.capacity > cap_range[0])
2006
            & (mastr_gdf.capacity <= cap_range[1])
2007
        ]
2008
2009
        if count == 0:
2010
            building_area_range_dict[cap_range] = (
2011
                min_building_size,
2012
                cap_range_gdf.building_area.quantile(upper_quantile),
2013
            )
2014
        elif count == n_ranges - 1:
2015
            building_area_range_dict[cap_range] = (
2016
                cap_range_gdf.building_area.quantile(lower_quantile),
2017
                float("inf"),
2018
            )
2019
        else:
2020
            building_area_range_dict[cap_range] = (
2021
                cap_range_gdf.building_area.quantile(lower_quantile),
2022
                cap_range_gdf.building_area.quantile(upper_quantile),
2023
            )
2024
2025
    values = list(building_area_range_dict.values())
2026
2027
    building_area_range_normed_dict = {}
2028
2029
    for count, (cap_range, (min_area, max_area)) in enumerate(
2030
        building_area_range_dict.items()
2031
    ):
2032
        if count == 0:
2033
            building_area_range_normed_dict[cap_range] = (
2034
                min_area,
2035
                np.mean((values[count + 1][0], max_area)),
2036
            )
2037
        elif count == n_ranges - 1:
2038
            building_area_range_normed_dict[cap_range] = (
2039
                np.mean((values[count - 1][1], min_area)),
2040
                max_area,
2041
            )
2042
        else:
2043
            building_area_range_normed_dict[cap_range] = (
2044
                np.mean((values[count - 1][1], min_area)),
2045
                np.mean((values[count + 1][0], max_area)),
2046
            )
2047
2048
    return building_area_range_normed_dict
2049
2050
2051
@timer_func
2052
def desaggregate_pv_in_mv_grid(
2053
    buildings_gdf: gpd.GeoDataFrame,
2054
    pv_cap: float | int,
2055
    **kwargs,
2056
) -> gpd.GeoDataFrame:
2057
    """
2058
    Desaggregate PV capacity on buildings within a given grid district.
2059
    Parameters
2060
    -----------
2061
    buildings_gdf : geopandas.GeoDataFrame
2062
        GeoDataFrame containing buildings within the grid district.
2063
    pv_cap : float, int
2064
        PV capacity to desaggregate.
2065
    Other Parameters
2066
    -----------
2067
    prob_dict : dict
2068
        Dictionary with values and probabilities per capacity range.
2069
    cap_share_dict : dict
2070
        Dictionary with share of PV capacity from the total PV capacity within
2071
        capacity ranges.
2072
    building_area_range_dict : dict
2073
        Dictionary with estimated normal building area range per capacity
2074
        range.
2075
    load_factor_dict : dict
2076
        Dictionary with mean roof load factor per capacity range.
2077
    seed : int
2078
        Seed to use for random operations with NumPy and pandas.
2079
    pv_cap_per_sq_m : float, int
2080
        Average expected, installable PV capacity per square meter.
2081
    Returns
2082
    -------
2083
    geopandas.GeoDataFrame
2084
        GeoDataFrame containing OSM building data with desaggregated PV
2085
        plants.
2086
    """
2087
    bus_id = int(buildings_gdf.bus_id.iat[0])
2088
2089
    rng = default_rng(seed=kwargs["seed"])
2090
    random_state = RandomState(seed=kwargs["seed"])
2091
2092
    results_df = pd.DataFrame(columns=buildings_gdf.columns)
2093
2094
    for cap_range, share in kwargs["cap_share_dict"].items():
2095
        pv_cap_range = pv_cap * share
2096
2097
        b_area_min, b_area_max = kwargs["building_area_range_dict"][cap_range]
2098
2099
        cap_range_buildings_gdf = buildings_gdf.loc[
2100
            ~buildings_gdf.index.isin(results_df.index)
2101
            & (buildings_gdf.building_area > b_area_min)
2102
            & (buildings_gdf.building_area <= b_area_max)
2103
        ]
2104
2105
        mean_load_factor = kwargs["load_factor_dict"][cap_range]
2106
        cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
2107
            mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
2108
            load_factor=np.nan,
2109
            capacity=np.nan,
2110
        )
2111
2112
        total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
2113
2114
        if total_mean_cap == 0:
2115
            logger.warning(
2116
                f"There are no matching roof for capacity range {cap_range} "
2117
                f"kW in grid {bus_id}. Using all buildings as fallback."
2118
            )
2119
2120
            cap_range_buildings_gdf = buildings_gdf.loc[
2121
                ~buildings_gdf.index.isin(results_df.index)
2122
            ]
2123
2124
            if len(cap_range_buildings_gdf) == 0:
2125
                logger.warning(
2126
                    "There are no roofes available for capacity range "
2127
                    f"{cap_range} kW in grid {bus_id}. Allowing dual use."
2128
                )
2129
                cap_range_buildings_gdf = buildings_gdf.copy()
2130
2131
            cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
2132
                mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
2133
                load_factor=np.nan,
2134
                capacity=np.nan,
2135
            )
2136
2137
            total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
2138
2139
        elif total_mean_cap < pv_cap_range:
2140
            logger.warning(
2141
                f"Average roof utilization of the roof area in grid {bus_id} "
2142
                f"and capacity range {cap_range} kW is not sufficient. The "
2143
                "roof utilization will be above average."
2144
            )
2145
2146
        frac = max(
2147
            pv_cap_range / total_mean_cap,
2148
            1 / len(cap_range_buildings_gdf),
2149
        )
2150
2151
        samples_gdf = cap_range_buildings_gdf.sample(
2152
            frac=min(1, frac),
2153
            random_state=random_state,
2154
        )
2155
2156
        cap_range_dict = kwargs["prob_dict"][cap_range]
2157
2158
        values_dict = cap_range_dict["values"]
2159
        p_dict = cap_range_dict["probabilities"]
2160
2161
        load_factors = rng.choice(
2162
            a=values_dict["load_factor"],
2163
            size=len(samples_gdf),
2164
            p=p_dict["load_factor"],
2165
        )
2166
2167
        samples_gdf = samples_gdf.assign(
2168
            load_factor=load_factors,
2169
            capacity=(
2170
                samples_gdf.building_area
2171
                * load_factors
2172
                * kwargs["pv_cap_per_sq_m"]
2173
            ).clip(lower=0.4),
2174
        )
2175
2176
        missing_factor = pv_cap_range / samples_gdf.capacity.sum()
2177
2178
        samples_gdf = samples_gdf.assign(
2179
            capacity=(samples_gdf.capacity * missing_factor),
2180
            load_factor=(samples_gdf.load_factor * missing_factor),
2181
        )
2182
2183
        assert np.isclose(
2184
            samples_gdf.capacity.sum(),
2185
            pv_cap_range,
2186
            rtol=1e-03,
2187
        ), f"{samples_gdf.capacity.sum()} != {pv_cap_range}"
2188
2189
        results_df = pd.concat(
2190
            [
2191
                results_df,
2192
                samples_gdf,
2193
            ],
2194
        )
2195
2196
    total_missing_factor = pv_cap / results_df.capacity.sum()
2197
2198
    results_df = results_df.assign(
2199
        capacity=(results_df.capacity * total_missing_factor),
2200
    )
2201
2202
    assert np.isclose(
2203
        results_df.capacity.sum(),
2204
        pv_cap,
2205
        rtol=1e-03,
2206
    ), f"{results_df.capacity.sum()} != {pv_cap}"
2207
2208
    return gpd.GeoDataFrame(
2209
        results_df,
2210
        crs=samples_gdf.crs,
0 ignored issues
show
introduced by
The variable samples_gdf does not seem to be defined in case the for loop on line 2094 is not entered. Are you sure this can never be the case?
Loading history...
2211
        geometry="geom",
2212
    )
2213
2214
2215
@timer_func
2216
def desaggregate_pv(
2217
    buildings_gdf: gpd.GeoDataFrame,
2218
    cap_df: pd.DataFrame,
2219
    **kwargs,
2220
) -> gpd.GeoDataFrame:
2221
    """
2222
    Desaggregate PV capacity on buildings within a given grid district.
2223
    Parameters
2224
    -----------
2225
    buildings_gdf : geopandas.GeoDataFrame
2226
        GeoDataFrame containing OSM buildings data.
2227
    cap_df : pandas.DataFrame
2228
        DataFrame with total rooftop capacity per mv grid.
2229
    Other Parameters
2230
    -----------
2231
    prob_dict : dict
2232
        Dictionary with values and probabilities per capacity range.
2233
    cap_share_dict : dict
2234
        Dictionary with share of PV capacity from the total PV capacity within
2235
        capacity ranges.
2236
    building_area_range_dict : dict
2237
        Dictionary with estimated normal building area range per capacity
2238
        range.
2239
    load_factor_dict : dict
2240
        Dictionary with mean roof load factor per capacity range.
2241
    seed : int
2242
        Seed to use for random operations with NumPy and pandas.
2243
    pv_cap_per_sq_m : float, int
2244
        Average expected, installable PV capacity per square meter.
2245
    Returns
2246
    -------
2247
    geopandas.GeoDataFrame
2248
        GeoDataFrame containing OSM building data with desaggregated PV
2249
        plants.
2250
    """
2251
    allocated_buildings_gdf = buildings_gdf.loc[~buildings_gdf.end_of_life]
2252
2253
    building_bus_ids = set(buildings_gdf.bus_id)
2254
    cap_bus_ids = set(cap_df.index)
2255
2256
    logger.debug(
2257
        f"Bus IDs from buildings: {len(building_bus_ids)}\nBus IDs from capacity: "
2258
        f"{len(cap_bus_ids)}"
2259
    )
2260
2261
    if len(building_bus_ids) > len(cap_bus_ids):
2262
        missing = building_bus_ids - cap_bus_ids
2263
    else:
2264
        missing = cap_bus_ids - building_bus_ids
2265
2266
    logger.debug(str(missing))
2267
2268
    # assert set(buildings_gdf.bus_id.unique()) == set(cap_df.index)
2269
2270
    for bus_id in buildings_gdf.bus_id.unique():
2271
        buildings_grid_gdf = buildings_gdf.loc[buildings_gdf.bus_id == bus_id]
2272
2273
        pv_installed_gdf = buildings_grid_gdf.loc[
2274
            ~buildings_grid_gdf.end_of_life
2275
        ]
2276
2277
        pv_installed = pv_installed_gdf.capacity.sum()
2278
2279
        pot_buildings_gdf = buildings_grid_gdf.drop(
2280
            index=pv_installed_gdf.index
2281
        )
2282
2283
        if len(pot_buildings_gdf) == 0:
2284
            logger.error(
2285
                f"In grid {bus_id} there are no potential buildings to allocate "
2286
                "PV capacity to. The grid is skipped. This message should only "
2287
                "appear doing test runs with few buildings."
2288
            )
2289
2290
            continue
2291
2292
        pv_target = cap_df.at[bus_id, "capacity"] * 1000
2293
2294
        logger.debug(f"pv_target: {pv_target}")
2295
2296
        pv_missing = pv_target - pv_installed
2297
2298
        if pv_missing <= 0:
2299
            logger.info(
2300
                f"In grid {bus_id} there is more PV installed ({pv_installed: g}) in "
2301
                f"status Quo than allocated within the scenario ({pv_target: g}). No "
2302
                f"new generators are added."
2303
            )
2304
2305
            continue
2306
2307
        if pot_buildings_gdf.max_cap.sum() < pv_missing:
2308
            logger.error(
2309
                f"In grid {bus_id} there is less PV potential ("
2310
                f"{pot_buildings_gdf.max_cap.sum():g} kW) than allocated PV  capacity "
2311
                f"({pv_missing:g} kW). The average roof utilization will be very high."
2312
            )
2313
2314
        gdf = desaggregate_pv_in_mv_grid(
2315
            buildings_gdf=pot_buildings_gdf,
2316
            pv_cap=pv_missing,
2317
            **kwargs,
2318
        )
2319
2320
        logger.debug(f"New cap in grid {bus_id}: {gdf.capacity.sum()}")
2321
        logger.debug(f"Installed cap in grid {bus_id}: {pv_installed}")
2322
        logger.debug(
2323
            f"Total cap in grid {bus_id}: {gdf.capacity.sum() + pv_installed}"
2324
        )
2325
2326
        if not np.isclose(
2327
            gdf.capacity.sum() + pv_installed, pv_target, rtol=1e-3
2328
        ):
2329
            logger.warning(
2330
                f"The desired capacity and actual capacity in grid {bus_id} differ.\n"
2331
                f"Desired cap: {pv_target}\nActual cap: "
2332
                f"{gdf.capacity.sum() + pv_installed}"
2333
            )
2334
2335
        allocated_buildings_gdf = pd.concat(
2336
            [
2337
                allocated_buildings_gdf,
2338
                gdf,
2339
            ]
2340
        )
2341
2342
    logger.debug("Desaggregated scenario.")
2343
    logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}")
2344
    logger.debug(
2345
        f"Generator capacity: {allocated_buildings_gdf.capacity.sum(): g}"
2346
    )
2347
2348
    return gpd.GeoDataFrame(
2349
        allocated_buildings_gdf,
2350
        crs=gdf.crs,
0 ignored issues
show
introduced by
The variable gdf does not seem to be defined for all execution paths.
Loading history...
2351
        geometry="geom",
2352
    )
2353
2354
2355
@timer_func
2356
def add_buildings_meta_data(
2357
    buildings_gdf: gpd.GeoDataFrame,
2358
    prob_dict: dict,
2359
    seed: int,
2360
) -> gpd.GeoDataFrame:
2361
    """
2362
    Randomly add additional metadata to desaggregated PV plants.
2363
    Parameters
2364
    -----------
2365
    buildings_gdf : geopandas.GeoDataFrame
2366
        GeoDataFrame containing OSM buildings data with desaggregated PV
2367
        plants.
2368
    prob_dict : dict
2369
        Dictionary with values and probabilities per capacity range.
2370
    seed : int
2371
        Seed to use for random operations with NumPy and pandas.
2372
    Returns
2373
    -------
2374
    geopandas.GeoDataFrame
2375
        GeoDataFrame containing OSM building data with desaggregated PV
2376
        plants.
2377
    """
2378
    rng = default_rng(seed=seed)
2379
    buildings_gdf = buildings_gdf.reset_index().rename(
2380
        columns={
2381
            "index": "building_id",
2382
        }
2383
    )
2384
2385
    for (min_cap, max_cap), cap_range_prob_dict in prob_dict.items():
2386
        cap_range_gdf = buildings_gdf.loc[
2387
            (buildings_gdf.capacity >= min_cap)
2388
            & (buildings_gdf.capacity < max_cap)
2389
        ]
2390
2391
        for key, values in cap_range_prob_dict["values"].items():
2392
            if key == "load_factor":
2393
                continue
2394
2395
            gdf = cap_range_gdf.loc[
2396
                cap_range_gdf[key].isna()
2397
                | cap_range_gdf[key].isnull()
2398
                | (cap_range_gdf[key] == "None")
2399
            ]
2400
2401
            key_vals = rng.choice(
2402
                a=values,
2403
                size=len(gdf),
2404
                p=cap_range_prob_dict["probabilities"][key],
2405
            )
2406
2407
            buildings_gdf.loc[gdf.index, key] = key_vals
2408
2409
    return buildings_gdf
2410
2411
2412
def add_voltage_level(
2413
    buildings_gdf: gpd.GeoDataFrame,
2414
) -> gpd.GeoDataFrame:
2415
    """
2416
    Add voltage level derived from generator capacity to the power plants.
2417
    Parameters
2418
    -----------
2419
    buildings_gdf : geopandas.GeoDataFrame
2420
        GeoDataFrame containing OSM buildings data with desaggregated PV
2421
        plants.
2422
    Returns
2423
    -------
2424
    geopandas.GeoDataFrame
2425
        GeoDataFrame containing OSM building data with voltage level per generator.
2426
    """
2427
2428
    def voltage_levels(p: float) -> int:
2429
        if p < 100:
2430
            return 7
2431
        elif p < 200:
2432
            return 6
2433
        elif p < 5500:
2434
            return 5
2435
        elif p < 20000:
2436
            return 4
2437
        elif p < 120000:
2438
            return 3
2439
        return 1
2440
2441
    return buildings_gdf.assign(
2442
        voltage_level=buildings_gdf.capacity.apply(voltage_levels)
2443
    )
2444
2445
2446
def add_start_up_date(
2447
    buildings_gdf: gpd.GeoDataFrame,
2448
    start: pd.Timestamp,
2449
    end: pd.Timestamp,
2450
    seed: int,
2451
):
2452
    """
2453
    Randomly and linear add start-up date to new pv generators.
2454
    Parameters
2455
    ----------
2456
    buildings_gdf : geopandas.GeoDataFrame
2457
        GeoDataFrame containing OSM buildings data with desaggregated PV
2458
        plants.
2459
    start : pandas.Timestamp
2460
        Minimum Timestamp to use.
2461
    end : pandas.Timestamp
2462
        Maximum Timestamp to use.
2463
    seed : int
2464
        Seed to use for random operations with NumPy and pandas.
2465
    Returns
2466
    -------
2467
    geopandas.GeoDataFrame
2468
        GeoDataFrame containing OSM buildings data with start-up date added.
2469
    """
2470
    rng = default_rng(seed=seed)
2471
2472
    date_range = pd.date_range(start=start, end=end, freq="1D")
2473
2474
    return buildings_gdf.assign(
2475
        start_up_date=rng.choice(date_range, size=len(buildings_gdf))
2476
    )
2477
2478
2479
@timer_func
2480
def allocate_scenarios(
2481
    mastr_gdf: gpd.GeoDataFrame,
2482
    valid_buildings_gdf: gpd.GeoDataFrame,
2483
    last_scenario_gdf: gpd.GeoDataFrame,
2484
    scenario: str,
2485
):
2486
    """
2487
    Desaggregate and allocate scenario pv rooftop ramp-ups onto buildings.
2488
    Parameters
2489
    ----------
2490
    mastr_gdf : geopandas.GeoDataFrame
2491
        GeoDataFrame containing geocoded MaStR data.
2492
    valid_buildings_gdf : geopandas.GeoDataFrame
2493
        GeoDataFrame containing OSM buildings data.
2494
    last_scenario_gdf : geopandas.GeoDataFrame
2495
        GeoDataFrame containing OSM buildings matched with pv generators from temporal
2496
        preceding scenario.
2497
    scenario : str
2498
        Scenario to desaggrgate and allocate.
2499
    Returns
2500
    -------
2501
    tuple
2502
        geopandas.GeoDataFrame
2503
            GeoDataFrame containing OSM buildings matched with pv generators.
2504
        pandas.DataFrame
2505
            DataFrame containing pv rooftop capacity per grid id.
2506
    """
2507
    cap_per_bus_id_df = cap_per_bus_id(scenario)
2508
2509
    logger.debug(
2510
        f"cap_per_bus_id_df total capacity: {cap_per_bus_id_df.capacity.sum()}"
2511
    )
2512
2513
    last_scenario_gdf = determine_end_of_life_gens(
2514
        last_scenario_gdf,
2515
        SCENARIO_TIMESTAMP[scenario],
2516
        PV_ROOFTOP_LIFETIME,
2517
    )
2518
2519
    buildings_gdf = calculate_max_pv_cap_per_building(
2520
        valid_buildings_gdf,
2521
        last_scenario_gdf,
2522
        PV_CAP_PER_SQ_M,
2523
        ROOF_FACTOR,
2524
    )
2525
2526
    mastr_gdf = calculate_building_load_factor(
2527
        mastr_gdf,
2528
        buildings_gdf,
2529
    )
2530
2531
    probabilities_dict = probabilities(
2532
        mastr_gdf,
2533
        cap_ranges=CAP_RANGES,
2534
    )
2535
2536
    cap_share_dict = cap_share_per_cap_range(
2537
        mastr_gdf,
2538
        cap_ranges=CAP_RANGES,
2539
    )
2540
2541
    load_factor_dict = mean_load_factor_per_cap_range(
2542
        mastr_gdf,
2543
        cap_ranges=CAP_RANGES,
2544
    )
2545
2546
    building_area_range_dict = building_area_range_per_cap_range(
2547
        mastr_gdf,
2548
        cap_ranges=CAP_RANGES,
2549
        min_building_size=MIN_BUILDING_SIZE,
2550
        upper_quantile=UPPER_QUNATILE,
2551
        lower_quantile=LOWER_QUANTILE,
2552
    )
2553
2554
    allocated_buildings_gdf = desaggregate_pv(
2555
        buildings_gdf=buildings_gdf,
2556
        cap_df=cap_per_bus_id_df,
2557
        prob_dict=probabilities_dict,
2558
        cap_share_dict=cap_share_dict,
2559
        building_area_range_dict=building_area_range_dict,
2560
        load_factor_dict=load_factor_dict,
2561
        seed=SEED,
2562
        pv_cap_per_sq_m=PV_CAP_PER_SQ_M,
2563
    )
2564
2565
    allocated_buildings_gdf = allocated_buildings_gdf.assign(scenario=scenario)
2566
2567
    meta_buildings_gdf = frame_to_numeric(
2568
        add_buildings_meta_data(
2569
            allocated_buildings_gdf,
2570
            probabilities_dict,
2571
            SEED,
2572
        )
2573
    )
2574
2575
    return (
2576
        add_start_up_date(
2577
            meta_buildings_gdf,
2578
            start=last_scenario_gdf.start_up_date.max(),
2579
            end=SCENARIO_TIMESTAMP[scenario],
2580
            seed=SEED,
2581
        ),
2582
        cap_per_bus_id_df,
2583
    )
2584
2585
2586
class EgonPowerPlantPvRoofBuildingScenario(Base):
2587
    __tablename__ = "egon_power_plants_pv_roof_building"
2588
    __table_args__ = {"schema": "supply"}
2589
2590
    index = Column(Integer, primary_key=True, index=True)
2591
    scenario = Column(String)
2592
    bus_id = Column(Integer, nullable=True)
2593
    building_id = Column(Integer)
2594
    gens_id = Column(String, nullable=True)
2595
    capacity = Column(Float)
2596
    einheitliche_ausrichtung_und_neigungswinkel = Column(Float)
2597
    hauptausrichtung = Column(String)
2598
    hauptausrichtung_neigungswinkel = Column(String)
2599
    voltage_level = Column(Integer)
2600
2601
2602
def create_scenario_table(buildings_gdf):
2603
    """Create mapping table pv_unit <-> building for scenario"""
2604
    EgonPowerPlantPvRoofBuildingScenario.__table__.drop(
2605
        bind=engine, checkfirst=True
2606
    )
2607
    EgonPowerPlantPvRoofBuildingScenario.__table__.create(
2608
        bind=engine, checkfirst=True
2609
    )
2610
2611
    buildings_gdf.rename(columns=COLS_TO_RENAME).assign(
2612
        capacity=buildings_gdf.capacity.div(10**3)  # kW -> MW
2613
    )[COLS_TO_EXPORT].reset_index().to_sql(
2614
        name=EgonPowerPlantPvRoofBuildingScenario.__table__.name,
2615
        schema=EgonPowerPlantPvRoofBuildingScenario.__table__.schema,
2616
        con=db.engine(),
2617
        if_exists="append",
2618
        index=False,
2619
    )
2620
2621
2622
def geocode_mastr_data():
2623
    """
2624
    Read PV rooftop data from MaStR CSV
2625
    TODO: the source will be replaced as soon as the MaStR data is available
2626
     in DB.
2627
    """
2628
    mastr_df = mastr_data(
2629
        MASTR_INDEX_COL,
2630
        MASTR_RELEVANT_COLS,
2631
        MASTR_DTYPES,
2632
        MASTR_PARSE_DATES,
2633
    )
2634
2635
    clean_mastr_df = clean_mastr_data(
2636
        mastr_df,
2637
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
2638
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
2639
        seed=SEED,
2640
        rounding=ROUNDING,
2641
    )
2642
2643
    geocoding_df = geocoding_data(clean_mastr_df)
2644
2645
    ratelimiter = geocoder(USER_AGENT, MIN_DELAY_SECONDS)
2646
2647
    geocode_gdf = geocode_data(geocoding_df, ratelimiter, EPSG)
2648
2649
    create_geocoded_table(geocode_gdf)
2650
2651
2652
def pv_rooftop_to_buildings():
2653
    """Main script, executed as task"""
2654
2655
    mastr_gdf = load_mastr_data()
2656
2657
    buildings_gdf = load_building_data()
2658
2659
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_to_buildings(
2660
        mastr_gdf, buildings_gdf
2661
    )
2662
2663
    all_buildings_gdf = (
2664
        desagg_mastr_gdf.assign(scenario="status_quo")
2665
        .reset_index()
2666
        .rename(columns={"geometry": "geom", "EinheitMastrNummer": "gens_id"})
2667
    )
2668
2669
    scenario_buildings_gdf = all_buildings_gdf.copy()
2670
2671
    cap_per_bus_id_df = pd.DataFrame()
2672
2673
    for scenario in SCENARIOS:
2674
        logger.debug(f"Desaggregating scenario {scenario}.")
2675
        (
2676
            scenario_buildings_gdf,
2677
            cap_per_bus_id_scenario_df,
2678
        ) = allocate_scenarios(  # noqa: F841
2679
            desagg_mastr_gdf,
2680
            desagg_buildings_gdf,
2681
            scenario_buildings_gdf,
2682
            scenario,
2683
        )
2684
2685
        all_buildings_gdf = gpd.GeoDataFrame(
2686
            pd.concat(
2687
                [all_buildings_gdf, scenario_buildings_gdf], ignore_index=True
2688
            ),
2689
            crs=scenario_buildings_gdf.crs,
2690
            geometry="geom",
2691
        )
2692
2693
        cap_per_bus_id_df = pd.concat(
2694
            [cap_per_bus_id_df, cap_per_bus_id_scenario_df]
2695
        )
2696
2697
    # export scenario
2698
    create_scenario_table(add_voltage_level(all_buildings_gdf))
2699