synthetic_buildings()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 26
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 11
dl 0
loc 26
rs 9.85
c 0
b 0
f 0
cc 2
nop 1
1
"""
2
Distribute MaStR PV rooftop capacities to OSM and synthetic buildings. Generate
3
new PV rooftop generators for scenarios eGon2035 and eGon100RE.
4
5
See documentation section :ref:`pv-rooftop-ref` for more information.
6
7
"""
8
9
from __future__ import annotations
10
11
from collections import Counter
12
from functools import wraps
13
from time import perf_counter
14
import datetime
15
import json
16
17
from geoalchemy2 import Geometry
18
from loguru import logger
19
from numpy.random import RandomState, default_rng
20
from omi.dialects import get_dialect
21
from pyproj.crs.crs import CRS
22
from sqlalchemy import BigInteger, Column, Float, Integer, String
23
from sqlalchemy.dialects.postgresql import HSTORE
24
from sqlalchemy.ext.declarative import declarative_base
25
import geopandas as gpd
26
import numpy as np
27
import pandas as pd
28
29
from egon.data import config, db
30
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
31
    OsmBuildingsSynthetic,
32
)
33
from egon.data.datasets.power_plants.mastr_db_classes import EgonPowerPlantsPv
34
from egon.data.datasets.scenario_capacities import EgonScenarioCapacities
35
from egon.data.datasets.scenario_parameters import get_scenario_year
36
from egon.data.datasets.zensus_vg250 import Vg250Gem
37
from egon.data.metadata import (
38
    context,
39
    contributors,
40
    generate_resource_fields_from_db_table,
41
    license_dedl,
42
    license_odbl,
43
    meta_metadata,
44
    sources,
45
)
46
47
engine = db.engine()
48
Base = declarative_base()
49
SEED = int(config.settings()["egon-data"]["--random-seed"])
50
51
# TODO: move to yml
52
MASTR_INDEX_COL = "gens_id"
53
54
EPSG = 4326
55
SRID = 3035
56
57
# data cleaning
58
MAX_REALISTIC_PV_CAP = 23500 / 10**3
59
MIN_REALISTIC_PV_CAP = 0.1 / 10**3
60
61
# show additional logging information
62
VERBOSE = False
63
64
# Number of quantiles
65
Q = 5
66
67
# Scenario Data
68
SCENARIOS = config.settings()["egon-data"]["--scenarios"]
69
SCENARIO_TIMESTAMP = {
70
    "status2019": pd.Timestamp("2020-01-01", tz="UTC"),
71
    "status2023": pd.Timestamp("2024-01-01", tz="UTC"),
72
    "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"),
73
    "eGon100RE": pd.Timestamp("2050-01-01", tz="UTC"),
74
}
75
PV_ROOFTOP_LIFETIME = pd.Timedelta(20 * 365, unit="D")
76
77
# Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp
78
# https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf
79
MODUL_CAP = 0.4 / 10**3  # MWp
80
MODUL_SIZE = 1.096 * 1.754  # m²
81
PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE
82
83
# Estimation of usable roof area
84
# Factor for the conversion of building area to roof area
85
# estimation mean roof pitch: 35°
86
# estimation usable roof share: 80%
87
# estimation that only the south side of the building is used for pv
88
# see https://mediatum.ub.tum.de/doc/%20969497/969497.pdf
89
# AREA_FACTOR = 1.221
90
# USABLE_ROOF_SHARE = 0.8
91
# SOUTH_SHARE = 0.5
92
# ROOF_FACTOR = AREA_FACTOR * USABLE_ROOF_SHARE * SOUTH_SHARE
93
ROOF_FACTOR = 0.5
94
95
CAP_RANGES = [
96
    (0, 30 / 10**3),
97
    (30 / 10**3, 100 / 10**3),
98
    (100 / 10**3, float("inf")),
99
]
100
101
MIN_BUILDING_SIZE = 10.0
102
UPPER_QUANTILE = 0.95
103
LOWER_QUANTILE = 0.05
104
105
COLS_TO_EXPORT = [
106
    "scenario",
107
    "bus_id",
108
    "building_id",
109
    "gens_id",
110
    "capacity",
111
    "orientation_uniform",
112
    "orientation_primary",
113
    "orientation_primary_angle",
114
    "voltage_level",
115
    "weather_cell_id",
116
]
117
118
# TODO
119
INCLUDE_SYNTHETIC_BUILDINGS = True
120
ONLY_BUILDINGS_WITH_DEMAND = True
121
TEST_RUN = False
122
123
124
def timer_func(func):
125
    @wraps(func)
126
    def timeit_wrapper(*args, **kwargs):
127
        start_time = perf_counter()
128
        result = func(*args, **kwargs)
129
        end_time = perf_counter()
130
        total_time = end_time - start_time
131
        logger.debug(
132
            f"Function {func.__name__} took {total_time:.4f} seconds."
133
        )
134
        return result
135
136
    return timeit_wrapper
137
138
139
@timer_func
140
def mastr_data(
141
    index_col: str | int | list[str] | list[int],
142
) -> gpd.GeoDataFrame:
143
    """
144
    Read MaStR data from database.
145
146
    Parameters
147
    -----------
148
    index_col : str, int or list of str or int
149
        Column(s) to use as the row labels of the DataFrame.
150
    Returns
151
    -------
152
    pandas.DataFrame
153
        DataFrame containing MaStR data.
154
    """
155
    with db.session_scope() as session:
156
        query = session.query(EgonPowerPlantsPv).filter(
157
            EgonPowerPlantsPv.status == "InBetrieb",
158
            EgonPowerPlantsPv.site_type
159
            == ("Bauliche Anlagen (Hausdach, Gebäude und Fassade)"),
160
        )
161
162
    gdf = gpd.read_postgis(
163
        query.statement, query.session.bind, index_col=index_col
164
    ).drop(columns="id")
165
166
    logger.debug("MaStR data loaded.")
167
168
    return gdf
169
170
171
@timer_func
172
def clean_mastr_data(
173
    mastr_gdf: gpd.GeoDataFrame,
174
    max_realistic_pv_cap: int | float,
175
    min_realistic_pv_cap: int | float,
176
    seed: int,
177
) -> gpd.GeoDataFrame:
178
    """
179
    Clean the MaStR data from implausible data.
180
181
    * Drop MaStR ID duplicates.
182
    * Drop generators with implausible capacities.
183
184
    Parameters
185
    -----------
186
    mastr_gdf : pandas.DataFrame
187
        DataFrame containing MaStR data.
188
    max_realistic_pv_cap : int or float
189
        Maximum capacity, which is considered to be realistic.
190
    min_realistic_pv_cap : int or float
191
        Minimum capacity, which is considered to be realistic.
192
    seed : int
193
        Seed to use for random operations with NumPy and pandas.
194
    Returns
195
    -------
196
    pandas.DataFrame
197
        DataFrame containing cleaned MaStR data.
198
    """
199
    init_len = len(mastr_gdf)
200
201
    # drop duplicates
202
    mastr_gdf = mastr_gdf.loc[~mastr_gdf.index.duplicated()]
203
204
    # drop generators without any capacity info
205
    # and capacity of zero
206
    # and if the capacity is > 23.5 MW, because
207
    # Germanies largest rooftop PV is 23 MW
208
    # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379
209
    mastr_gdf = mastr_gdf.loc[
210
        ~mastr_gdf.capacity.isna()
211
        & (mastr_gdf.capacity <= max_realistic_pv_cap)
212
        & (mastr_gdf.capacity > min_realistic_pv_cap)
213
    ]
214
215
    # get consistent start-up date
216
    # randomly and weighted fill missing start-up dates
217
    pool = mastr_gdf.loc[
218
        ~mastr_gdf.commissioning_date.isna()
219
    ].commissioning_date.to_numpy()
220
221
    size = len(mastr_gdf) - len(pool)
222
223
    if size > 0:
224
        rng = default_rng(seed=seed)
225
226
        choice = rng.choice(
227
            pool,
228
            size=size,
229
            replace=False,
230
        )
231
232
        mastr_gdf.loc[mastr_gdf.commissioning_date.isna()] = mastr_gdf.loc[
233
            mastr_gdf.commissioning_date.isna()
234
        ].assign(commissioning_date=choice)
235
236
        logger.info(
237
            f"Randomly and weigthed added start-up date to {size} generators."
238
        )
239
240
    mastr_gdf = mastr_gdf.assign(
241
        commissioning_date=pd.to_datetime(
242
            mastr_gdf.commissioning_date, utc=True
243
        )
244
    )
245
246
    end_len = len(mastr_gdf)
247
    logger.debug(
248
        f"Dropped {init_len - end_len} "
249
        f"({((init_len - end_len) / init_len) * 100:g}%)"
250
        f" of {init_len} rows from MaStR DataFrame."
251
    )
252
253
    return mastr_gdf
254
255
256
@timer_func
257
def municipality_data() -> gpd.GeoDataFrame:
258
    """
259
    Get municipality data from eGo^n Database.
260
    Returns
261
    -------
262
    gepandas.GeoDataFrame
263
        GeoDataFrame with municipality data.
264
    """
265
    with db.session_scope() as session:
266
        query = session.query(Vg250Gem.ags, Vg250Gem.geometry.label("geom"))
267
268
    return gpd.read_postgis(
269
        query.statement, query.session.bind, index_col="ags"
270
    )
271
272
273
@timer_func
274
def add_ags_to_gens(
275
    mastr_gdf: gpd.GeoDataFrame,
276
    municipalities_gdf: gpd.GeoDataFrame,
277
) -> gpd.GeoDataFrame:
278
    """
279
    Add information about AGS ID to generators.
280
281
    Parameters
282
    -----------
283
    mastr_gdf : geopandas.GeoDataFrame
284
        GeoDataFrame with valid and cleaned MaStR data.
285
    municipalities_gdf : geopandas.GeoDataFrame
286
        GeoDataFrame with municipality data.
287
    Returns
288
    -------
289
    gepandas.GeoDataFrame
290
        GeoDataFrame with valid and cleaned MaStR data
291
        with AGS ID added.
292
    """
293
    return mastr_gdf.sjoin(
294
        municipalities_gdf,
295
        how="left",
296
        predicate="intersects",
297
    ).rename(columns={"index_right": "ags"})
298
299
300
def drop_gens_outside_muns(
301
    mastr_gdf: gpd.GeoDataFrame,
302
) -> gpd.GeoDataFrame:
303
    """
304
    Drop all generators outside of municipalities.
305
306
    Parameters
307
    -----------
308
    mastr_gdf : geopandas.GeoDataFrame
309
        GeoDataFrame with valid and cleaned MaStR data.
310
    Returns
311
    -------
312
    gepandas.GeoDataFrame
313
        GeoDataFrame with valid and cleaned MaStR data
314
        with generatos without an AGS ID dropped.
315
    """
316
    gdf = mastr_gdf.loc[~mastr_gdf.ags.isna()]
317
318
    logger.debug(
319
        f"{len(mastr_gdf) - len(gdf)} ("
320
        f"{(len(mastr_gdf) - len(gdf)) / len(mastr_gdf) * 100:g}%)"
321
        f" of {len(mastr_gdf)} values are outside of the municipalities"
322
        " and are therefore dropped."
323
    )
324
325
    return gdf
326
327
328
def load_mastr_data():
329
    """Read PV rooftop data from MaStR CSV
330
    Note: the source will be replaced as soon as the MaStR data is available
331
    in DB.
332
333
    Returns
334
    -------
335
    geopandas.GeoDataFrame
336
        GeoDataFrame containing MaStR data with geocoded locations.
337
    """
338
    mastr_gdf = mastr_data(
339
        MASTR_INDEX_COL,
340
    )
341
342
    clean_mastr_gdf = clean_mastr_data(
343
        mastr_gdf,
344
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
345
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
346
        seed=SEED,
347
    )
348
349
    municipalities_gdf = municipality_data()
350
351
    clean_mastr_gdf = add_ags_to_gens(clean_mastr_gdf, municipalities_gdf)
352
353
    return drop_gens_outside_muns(clean_mastr_gdf)
354
355
356
class OsmBuildingsFiltered(Base):
357
    """
358
    Class definition of table openstreetmap.osm_buildings_filtered.
359
360
    """
361
362
    __tablename__ = "osm_buildings_filtered"
363
    __table_args__ = {"schema": "openstreetmap"}
364
365
    osm_id = Column(BigInteger)
366
    amenity = Column(String)
367
    building = Column(String)
368
    name = Column(String)
369
    geom = Column(Geometry(srid=SRID), index=True)
370
    area = Column(Float)
371
    geom_point = Column(Geometry(srid=SRID), index=True)
372
    tags = Column(HSTORE)
373
    id = Column(BigInteger, primary_key=True, index=True)
374
375
376
@timer_func
377
def osm_buildings(
378
    to_crs: CRS,
379
) -> gpd.GeoDataFrame:
380
    """
381
    Read OSM buildings data from eGo^n Database.
382
383
    Parameters
384
    -----------
385
    to_crs : pyproj.crs.crs.CRS
386
        CRS to transform geometries to.
387
    Returns
388
    -------
389
    geopandas.GeoDataFrame
390
        GeoDataFrame containing OSM buildings data.
391
    """
392
    with db.session_scope() as session:
393
        query = session.query(
394
            OsmBuildingsFiltered.id,
395
            OsmBuildingsFiltered.area,
396
            OsmBuildingsFiltered.geom_point.label("geom"),
397
        )
398
399
    return gpd.read_postgis(
400
        query.statement, query.session.bind, index_col="id"
401
    ).to_crs(to_crs)
402
403
404
@timer_func
405
def synthetic_buildings(
406
    to_crs: CRS,
407
) -> gpd.GeoDataFrame:
408
    """
409
    Read synthetic buildings data from eGo^n Database.
410
411
    Parameters
412
    -----------
413
    to_crs : pyproj.crs.crs.CRS
414
        CRS to transform geometries to.
415
    Returns
416
    -------
417
    geopandas.GeoDataFrame
418
        GeoDataFrame containing OSM buildings data.
419
    """
420
    with db.session_scope() as session:
421
        query = session.query(
422
            OsmBuildingsSynthetic.id,
423
            OsmBuildingsSynthetic.area,
424
            OsmBuildingsSynthetic.geom_point.label("geom"),
425
        )
426
427
    return gpd.read_postgis(
428
        query.statement, query.session.bind, index_col="id"
429
    ).to_crs(to_crs)
430
431
432
@timer_func
433
def add_ags_to_buildings(
434
    buildings_gdf: gpd.GeoDataFrame,
435
    municipalities_gdf: gpd.GeoDataFrame,
436
) -> gpd.GeoDataFrame:
437
    """
438
    Add information about AGS ID to buildings.
439
440
    Parameters
441
    -----------
442
    buildings_gdf : geopandas.GeoDataFrame
443
        GeoDataFrame containing OSM buildings data.
444
    municipalities_gdf : geopandas.GeoDataFrame
445
        GeoDataFrame with municipality data.
446
    Returns
447
    -------
448
    gepandas.GeoDataFrame
449
        GeoDataFrame containing OSM buildings data
450
        with AGS ID added.
451
    """
452
    return buildings_gdf.sjoin(
453
        municipalities_gdf,
454
        how="left",
455
        predicate="intersects",
456
    ).rename(columns={"index_right": "ags"})
457
458
459
def drop_buildings_outside_muns(
460
    buildings_gdf: gpd.GeoDataFrame,
461
) -> gpd.GeoDataFrame:
462
    """
463
    Drop all buildings outside of municipalities.
464
465
    Parameters
466
    -----------
467
    buildings_gdf : geopandas.GeoDataFrame
468
        GeoDataFrame containing OSM buildings data.
469
    Returns
470
    -------
471
    gepandas.GeoDataFrame
472
        GeoDataFrame containing OSM buildings data
473
        with buildings without an AGS ID dropped.
474
    """
475
    gdf = buildings_gdf.loc[~buildings_gdf.ags.isna()]
476
477
    logger.debug(
478
        f"{len(buildings_gdf) - len(gdf)} "
479
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
480
        f"of {len(buildings_gdf)} values are outside of the municipalities "
481
        "and are therefore dropped."
482
    )
483
484
    return gdf
485
486
487
def egon_building_peak_loads():
488
489
    # use active scenario wich is closest to today
490
    scenario = sorted(SCENARIOS, key=get_scenario_year)[0]
491
492
    sql = f"""
493
    SELECT building_id
494
    FROM demand.egon_building_electricity_peak_loads
495
    WHERE scenario = '{scenario}'
496
    """
497
498
    return (
499
        db.select_dataframe(sql).building_id.astype(int).sort_values().unique()
500
    )
501
502
503
@timer_func
504
def load_building_data():
505
    """
506
    Read buildings from DB
507
    Tables:
508
509
    * `openstreetmap.osm_buildings_filtered` (from OSM)
510
    * `openstreetmap.osm_buildings_synthetic` (synthetic, created by us)
511
512
    Use column `id` for both as it is unique hence you concat both datasets.
513
    If INCLUDE_SYNTHETIC_BUILDINGS is False synthetic buildings will not be
514
    loaded.
515
516
    Returns
517
    -------
518
    gepandas.GeoDataFrame
519
        GeoDataFrame containing OSM buildings data with buildings without an
520
        AGS ID dropped.
521
    """
522
523
    municipalities_gdf = municipality_data()
524
525
    osm_buildings_gdf = osm_buildings(municipalities_gdf.crs)
526
527
    if INCLUDE_SYNTHETIC_BUILDINGS:
528
        synthetic_buildings_gdf = synthetic_buildings(municipalities_gdf.crs)
529
530
        buildings_gdf = gpd.GeoDataFrame(
531
            pd.concat(
532
                [
533
                    osm_buildings_gdf,
534
                    synthetic_buildings_gdf,
535
                ]
536
            ),
537
            geometry="geom",
538
            crs=osm_buildings_gdf.crs,
539
        ).rename(columns={"area": "building_area"})
540
541
        buildings_gdf.index = buildings_gdf.index.astype(int)
542
543
    else:
544
        buildings_gdf = osm_buildings_gdf.rename(
545
            columns={"area": "building_area"}
546
        )
547
548
    if ONLY_BUILDINGS_WITH_DEMAND:
549
        building_ids = egon_building_peak_loads()
550
551
        init_len = len(building_ids)
552
553
        building_ids = np.intersect1d(
554
            list(map(int, building_ids)),
555
            list(map(int, buildings_gdf.index.to_numpy())),
556
        )
557
558
        end_len = len(building_ids)
559
560
        logger.debug(
561
            f"{end_len/init_len * 100: g} % ({end_len} / {init_len}) "
562
            f"of buildings have peak load."
563
        )
564
565
        buildings_gdf = buildings_gdf.loc[building_ids]
566
567
    buildings_ags_gdf = add_ags_to_buildings(buildings_gdf, municipalities_gdf)
568
569
    buildings_ags_gdf = drop_buildings_outside_muns(buildings_ags_gdf)
570
571
    grid_districts_gdf = grid_districts(EPSG)
572
573
    federal_state_gdf = federal_state_data(grid_districts_gdf.crs)
574
575
    grid_federal_state_gdf = overlay_grid_districts_with_counties(
576
        grid_districts_gdf,
577
        federal_state_gdf,
578
    )
579
580
    buildings_overlay_gdf = add_overlay_id_to_buildings(
581
        buildings_ags_gdf,
582
        grid_federal_state_gdf,
583
    )
584
585
    logger.debug("Loaded buildings.")
586
587
    buildings_overlay_gdf = drop_buildings_outside_grids(buildings_overlay_gdf)
588
589
    # overwrite bus_id with data from new table
590
    sql = (
591
        "SELECT building_id, bus_id FROM "
592
        "boundaries.egon_map_zensus_mvgd_buildings"
593
    )
594
    map_building_bus_df = db.select_dataframe(sql)
595
596
    building_ids = np.intersect1d(
597
        list(map(int, map_building_bus_df.building_id.unique())),
598
        list(map(int, buildings_overlay_gdf.index.to_numpy())),
599
    )
600
601
    buildings_within_gdf = buildings_overlay_gdf.loc[building_ids]
602
603
    gdf = (
604
        buildings_within_gdf.reset_index()
605
        .drop(columns=["bus_id"])
606
        .merge(
607
            how="left",
608
            right=map_building_bus_df,
609
            left_on="id",
610
            right_on="building_id",
611
        )
612
        .drop(columns=["building_id"])
613
        .set_index("id")
614
        .sort_index()
615
    )
616
617
    return gdf[~gdf.index.duplicated(keep="first")]
618
619
620
@timer_func
621
def sort_and_qcut_df(
622
    df: pd.DataFrame | gpd.GeoDataFrame,
623
    col: str,
624
    q: int,
625
) -> pd.DataFrame | gpd.GeoDataFrame:
626
    """
627
    Determine the quantile of a given attribute in a (Geo)DataFrame.
628
    Sort the (Geo)DataFrame in ascending order for the given attribute.
629
630
    Parameters
631
    -----------
632
    df : pandas.DataFrame or geopandas.GeoDataFrame
633
        (Geo)DataFrame to sort and qcut.
634
    col : str
635
        Name of the attribute to sort and qcut the (Geo)DataFrame on.
636
    q : int
637
        Number of quantiles.
638
    Returns
639
    -------
640
    pandas.DataFrame or gepandas.GeoDataFrame
641
        Sorted and qcut (Geo)DataFrame.
642
    """
643
    df = df.sort_values(col, ascending=True)
644
645
    return df.assign(
646
        quant=pd.qcut(
647
            df[col],
648
            q=q,
649
            labels=range(q),
650
        )
651
    )
652
653
654
@timer_func
655
def allocate_pv(
656
    q_mastr_gdf: gpd.GeoDataFrame,
657
    q_buildings_gdf: gpd.GeoDataFrame,
658
    seed: int,
659
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
660
    """
661
    Allocate the MaStR pv generators to the OSM buildings.
662
    This will determine a building for each pv generator if there are more
663
    buildings than generators within a given AGS. Primarily generators are
664
    distributed with the same qunatile as the buildings. Multiple assignment
665
    is excluded.
666
667
    Parameters
668
    -----------
669
    q_mastr_gdf : geopandas.GeoDataFrame
670
        GeoDataFrame containing geocoded and qcut MaStR data.
671
    q_buildings_gdf : geopandas.GeoDataFrame
672
        GeoDataFrame containing qcut OSM buildings data.
673
    seed : int
674
        Seed to use for random operations with NumPy and pandas.
675
    Returns
676
    -------
677
    tuple with two geopandas.GeoDataFrame s
678
        GeoDataFrame containing MaStR data allocated to building IDs.
679
        GeoDataFrame containing building data allocated to MaStR IDs.
680
    """
681
    rng = default_rng(seed=seed)
682
683
    q_buildings_gdf = q_buildings_gdf.assign(gens_id=np.nan).sort_values(
684
        by=["ags", "quant"]
685
    )
686
    q_mastr_gdf = q_mastr_gdf.assign(building_id=np.nan).sort_values(
687
        by=["ags", "quant"]
688
    )
689
690
    ags_list = q_buildings_gdf.ags.unique()
691
692
    if TEST_RUN:
693
        ags_list = ags_list[:250]
694
695
    num_ags = len(ags_list)
696
697
    t0 = perf_counter()
698
699
    for count, ags in enumerate(ags_list):
700
        buildings = q_buildings_gdf.loc[q_buildings_gdf.ags == ags]
701
        gens = q_mastr_gdf.loc[q_mastr_gdf.ags == ags]
702
703
        len_build = len(buildings)
704
        len_gens = len(gens)
705
706
        if len_build < len_gens:
707
            gens = gens.sample(len_build, random_state=RandomState(seed=seed))
708
            logger.error(
709
                f"There are {len_gens} generators and only {len_build}"
710
                f" buildings in AGS {ags}. {len_gens - len(gens)} "
711
                "generators were truncated to match the amount of buildings."
712
            )
713
714
            assert len_build == len(gens)
715
716
        for quant in gens.quant.unique():
717
            q_buildings = buildings.loc[buildings.quant == quant]
718
            q_gens = gens.loc[gens.quant == quant]
719
720
            len_build = len(q_buildings)
721
            len_gens = len(q_gens)
722
723
            if len_build < len_gens:
724
                delta = len_gens - len_build
725
726
                logger.warning(
727
                    f"There are {len_gens} generators and only {len_build} "
728
                    f"buildings in AGS {ags} and quantile {quant}. {delta} "
729
                    f"buildings from AGS {ags} will be added randomly."
730
                )
731
732
                add_buildings = pd.Index(
733
                    rng.choice(
734
                        list(set(buildings.index) - set(q_buildings.index)),
735
                        size=delta,
736
                        replace=False,
737
                    )
738
                )
739
740
                chosen_buildings = q_buildings.index.union(add_buildings)
741
742
            else:
743
                chosen_buildings = rng.choice(
744
                    q_buildings.index,
745
                    size=len_gens,
746
                    replace=False,
747
                )
748
749
            q_buildings_gdf.loc[chosen_buildings, "gens_id"] = q_gens.index
750
            buildings = buildings.drop(chosen_buildings)
751
752
        if count % 500 == 0:
753
            logger.debug(
754
                f"Allocation of {count / num_ags * 100:g} % of AGS done. "
755
                f"It took {perf_counter() - t0:g} seconds."
756
            )
757
758
            t0 = perf_counter()
759
760
    assigned_buildings = q_buildings_gdf.loc[~q_buildings_gdf.gens_id.isna()]
761
762
    assert len(assigned_buildings) == len(assigned_buildings.gens_id.unique())
763
764
    q_mastr_gdf.loc[assigned_buildings.gens_id, "building_id"] = (
765
        assigned_buildings.index
766
    )
767
768
    assigned_gens = q_mastr_gdf.loc[~q_mastr_gdf.building_id.isna()]
769
770
    assert len(assigned_buildings) == len(assigned_gens)
771
772
    logger.debug("Allocated status quo generators to buildings.")
773
774
    return frame_to_numeric(q_mastr_gdf), frame_to_numeric(q_buildings_gdf)
775
776
777
def frame_to_numeric(
778
    df: pd.DataFrame | gpd.GeoDataFrame,
779
) -> pd.DataFrame | gpd.GeoDataFrame:
780
    """
781
    Try to convert all columns of a DataFrame to numeric ignoring errors.
782
783
    Parameters
784
    ----------
785
    df : pandas.DataFrame or geopandas.GeoDataFrame
786
    Returns
787
    -------
788
    pandas.DataFrame or geopandas.GeoDataFrame
789
    """
790
    if str(df.index.dtype) == "object":
791
        df.index = pd.to_numeric(df.index, errors="ignore")
792
793
    for col in df.columns:
794
        if str(df[col].dtype) == "object":
795
            df[col] = pd.to_numeric(df[col], errors="ignore")
796
797
    return df
798
799
800
def validate_output(
801
    desagg_mastr_gdf: pd.DataFrame | gpd.GeoDataFrame,
802
    desagg_buildings_gdf: pd.DataFrame | gpd.GeoDataFrame,
803
) -> None:
804
    """
805
    Validate output.
806
807
    * Validate that there are exactly as many buildings with a pv system as
808
      there are pv systems with a building
809
    * Validate that the building IDs with a pv system are the same building
810
      IDs as assigned to the pv systems
811
    * Validate that the pv system IDs with a building are the same pv system
812
      IDs as assigned to the buildings
813
814
    Parameters
815
    -----------
816
    desagg_mastr_gdf : geopandas.GeoDataFrame
817
        GeoDataFrame containing MaStR data allocated to building IDs.
818
    desagg_buildings_gdf : geopandas.GeoDataFrame
819
        GeoDataFrame containing building data allocated to MaStR IDs.
820
    """
821
    assert len(
822
        desagg_mastr_gdf.loc[~desagg_mastr_gdf.building_id.isna()]
823
    ) == len(desagg_buildings_gdf.loc[~desagg_buildings_gdf.gens_id.isna()])
824
    assert (
825
        np.sort(
826
            desagg_mastr_gdf.loc[
827
                ~desagg_mastr_gdf.building_id.isna()
828
            ].building_id.unique()
829
        )
830
        == np.sort(
831
            desagg_buildings_gdf.loc[
832
                ~desagg_buildings_gdf.gens_id.isna()
833
            ].index.unique()
834
        )
835
    ).all()
836
    assert (
837
        np.sort(
838
            desagg_mastr_gdf.loc[
839
                ~desagg_mastr_gdf.building_id.isna()
840
            ].index.unique()
841
        )
842
        == np.sort(
843
            desagg_buildings_gdf.loc[
844
                ~desagg_buildings_gdf.gens_id.isna()
845
            ].gens_id.unique()
846
        )
847
    ).all()
848
849
    logger.debug("Validated output.")
850
851
852
def drop_unallocated_gens(
853
    gdf: gpd.GeoDataFrame,
854
) -> gpd.GeoDataFrame:
855
    """
856
    Drop generators which did not get allocated.
857
858
    Parameters
859
    -----------
860
    gdf : geopandas.GeoDataFrame
861
        GeoDataFrame containing MaStR data allocated to building IDs.
862
    Returns
863
    -------
864
    geopandas.GeoDataFrame
865
        GeoDataFrame containing MaStR data with generators dropped which did
866
        not get allocated.
867
    """
868
    init_len = len(gdf)
869
    gdf = gdf.loc[~gdf.building_id.isna()]
870
    end_len = len(gdf)
871
872
    logger.debug(
873
        f"Dropped {init_len - end_len} "
874
        f"({((init_len - end_len) / init_len) * 100:g}%)"
875
        f" of {init_len} unallocated rows from MaStR DataFrame."
876
    )
877
878
    return gdf
879
880
881
@timer_func
882
def allocate_to_buildings(
883
    mastr_gdf: gpd.GeoDataFrame,
884
    buildings_gdf: gpd.GeoDataFrame,
885
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
886
    """
887
    Allocate status quo pv rooftop generators to buildings.
888
889
    Parameters
890
    -----------
891
    mastr_gdf : geopandas.GeoDataFrame
892
        GeoDataFrame containing MaStR data with geocoded locations.
893
    buildings_gdf : geopandas.GeoDataFrame
894
        GeoDataFrame containing OSM buildings data with buildings without an
895
        AGS ID dropped.
896
    Returns
897
    -------
898
    tuple with two geopandas.GeoDataFrame s
899
        GeoDataFrame containing MaStR data allocated to building IDs.
900
        GeoDataFrame containing building data allocated to MaStR IDs.
901
    """
902
    logger.debug("Starting allocation of status quo.")
903
904
    q_mastr_gdf = sort_and_qcut_df(mastr_gdf, col="capacity", q=Q)
905
    q_buildings_gdf = sort_and_qcut_df(buildings_gdf, col="building_area", q=Q)
906
907
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_pv(
908
        q_mastr_gdf, q_buildings_gdf, SEED
909
    )
910
911
    validate_output(desagg_mastr_gdf, desagg_buildings_gdf)
912
913
    return drop_unallocated_gens(desagg_mastr_gdf), desagg_buildings_gdf
914
915
916
@timer_func
917
def grid_districts(
918
    epsg: int,
919
) -> gpd.GeoDataFrame:
920
    """
921
    Load mv grid district geo data from eGo^n Database as
922
    geopandas.GeoDataFrame.
923
924
    Parameters
925
    -----------
926
    epsg : int
927
        EPSG ID to use as CRS.
928
    Returns
929
    -------
930
    geopandas.GeoDataFrame
931
        GeoDataFrame containing mv grid district ID and geo shapes data.
932
    """
933
    gdf = db.select_geodataframe(
934
        """
935
        SELECT bus_id, geom
936
        FROM grid.egon_mv_grid_district
937
        ORDER BY bus_id
938
        """,
939
        index_col="bus_id",
940
        geom_col="geom",
941
        epsg=epsg,
942
    )
943
944
    gdf.index = gdf.index.astype(int)
945
946
    logger.debug("Grid districts loaded.")
947
948
    return gdf
949
950
951
def scenario_data(
952
    carrier: str = "solar_rooftop",
953
    scenario: str = "eGon2035",
954
) -> pd.DataFrame:
955
    """
956
    Get scenario capacity data from eGo^n Database.
957
958
    Parameters
959
    -----------
960
    carrier : str
961
        Carrier type to filter table by.
962
    scenario : str
963
        Scenario to filter table by.
964
    Returns
965
    -------
966
    geopandas.GeoDataFrame
967
        GeoDataFrame with scenario capacity data in GW.
968
    """
969
    with db.session_scope() as session:
970
        query = session.query(EgonScenarioCapacities).filter(
971
            EgonScenarioCapacities.carrier == carrier,
972
            EgonScenarioCapacities.scenario_name == scenario,
973
        )
974
975
    df = pd.read_sql(
976
        query.statement, query.session.bind, index_col="index"
977
    ).sort_index()
978
979
    logger.debug("Scenario capacity data loaded.")
980
981
    return df
982
983
984 View Code Duplication
class Vg250Lan(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
985
    """
986
    Class definition of table boundaries.vg250_lan.
987
988
    """
989
990
    __tablename__ = "vg250_lan"
991
    __table_args__ = {"schema": "boundaries"}
992
993
    id = Column(BigInteger, primary_key=True, index=True)
994
    ade = Column(BigInteger)
995
    gf = Column(BigInteger)
996
    bsg = Column(BigInteger)
997
    ars = Column(String)
998
    ags = Column(String)
999
    sdv_ars = Column(String)
1000
    gen = Column(String)
1001
    bez = Column(String)
1002
    ibz = Column(BigInteger)
1003
    bem = Column(String)
1004
    nbd = Column(String)
1005
    sn_l = Column(String)
1006
    sn_r = Column(String)
1007
    sn_k = Column(String)
1008
    sn_v1 = Column(String)
1009
    sn_v2 = Column(String)
1010
    sn_g = Column(String)
1011
    fk_s3 = Column(String)
1012
    nuts = Column(String)
1013
    ars_0 = Column(String)
1014
    ags_0 = Column(String)
1015
    wsk = Column(String)
1016
    debkg_id = Column(String)
1017
    rs = Column(String)
1018
    sdv_rs = Column(String)
1019
    rs_0 = Column(String)
1020
    geometry = Column(Geometry(srid=EPSG), index=True)
1021
1022
1023
def federal_state_data(to_crs: CRS) -> gpd.GeoDataFrame:
1024
    """
1025
    Get feder state data from eGo^n Database.
1026
1027
    Parameters
1028
    -----------
1029
    to_crs : pyproj.crs.crs.CRS
1030
        CRS to transform geometries to.
1031
    Returns
1032
    -------
1033
    geopandas.GeoDataFrame
1034
        GeoDataFrame with federal state data.
1035
    """
1036
    with db.session_scope() as session:
1037
        query = session.query(
1038
            Vg250Lan.id, Vg250Lan.nuts, Vg250Lan.geometry.label("geom")
1039
        )
1040
1041
        gdf = gpd.read_postgis(
1042
            query.statement, session.connection(), index_col="id"
1043
        ).to_crs(to_crs)
1044
1045
    logger.debug("Federal State data loaded.")
1046
1047
    return gdf
1048
1049
1050
@timer_func
1051
def overlay_grid_districts_with_counties(
1052
    mv_grid_district_gdf: gpd.GeoDataFrame,
1053
    federal_state_gdf: gpd.GeoDataFrame,
1054
) -> gpd.GeoDataFrame:
1055
    """
1056
    Calculate the intersections of mv grid districts and counties.
1057
1058
    Parameters
1059
    -----------
1060
    mv_grid_district_gdf : gpd.GeoDataFrame
1061
        GeoDataFrame containing mv grid district ID and geo shapes data.
1062
    federal_state_gdf : gpd.GeoDataFrame
1063
        GeoDataFrame with federal state data.
1064
    Returns
1065
    -------
1066
    geopandas.GeoDataFrame
1067
        GeoDataFrame containing OSM buildings data.
1068
    """
1069
    logger.debug(
1070
        "Calculating intersection overlay between mv grid districts and "
1071
        "counties. This may take a while..."
1072
    )
1073
1074
    gdf = gpd.overlay(
1075
        federal_state_gdf.to_crs(mv_grid_district_gdf.crs),
1076
        mv_grid_district_gdf.reset_index(),
1077
        how="intersection",
1078
        keep_geom_type=True,
1079
    )
1080
1081
    logger.debug("Done!")
1082
1083
    return gdf
1084
1085
1086
@timer_func
1087
def add_overlay_id_to_buildings(
1088
    buildings_gdf: gpd.GeoDataFrame,
1089
    grid_federal_state_gdf: gpd.GeoDataFrame,
1090
) -> gpd.GeoDataFrame:
1091
    """
1092
    Add information about overlay ID to buildings.
1093
1094
    Parameters
1095
    -----------
1096
    buildings_gdf : geopandas.GeoDataFrame
1097
        GeoDataFrame containing OSM buildings data.
1098
    grid_federal_state_gdf : geopandas.GeoDataFrame
1099
        GeoDataFrame with intersection shapes between counties and grid
1100
        districts.
1101
    Returns
1102
    -------
1103
    geopandas.GeoDataFrame
1104
        GeoDataFrame containing OSM buildings data with overlay ID added.
1105
    """
1106
    gdf = (
1107
        buildings_gdf.to_crs(grid_federal_state_gdf.crs)
1108
        .sjoin(
1109
            grid_federal_state_gdf,
1110
            how="left",
1111
            predicate="intersects",
1112
        )
1113
        .rename(columns={"index_right": "overlay_id"})
1114
    )
1115
1116
    logger.debug("Added overlay ID to OSM buildings.")
1117
1118
    return gdf
1119
1120
1121
def drop_buildings_outside_grids(
1122
    buildings_gdf: gpd.GeoDataFrame,
1123
) -> gpd.GeoDataFrame:
1124
    """
1125
    Drop all buildings outside of grid areas.
1126
1127
    Parameters
1128
    -----------
1129
    buildings_gdf : geopandas.GeoDataFrame
1130
        GeoDataFrame containing OSM buildings data.
1131
    Returns
1132
    -------
1133
    gepandas.GeoDataFrame
1134
        GeoDataFrame containing OSM buildings data
1135
        with buildings without an bus ID dropped.
1136
    """
1137
    gdf = buildings_gdf.loc[~buildings_gdf.bus_id.isna()]
1138
1139
    logger.debug(
1140
        f"{len(buildings_gdf) - len(gdf)} "
1141
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
1142
        f"of {len(buildings_gdf)} values are outside of the grid areas "
1143
        "and are therefore dropped."
1144
    )
1145
1146
    return gdf
1147
1148
1149
def cap_per_bus_id(
1150
    scenario: str,
1151
) -> pd.DataFrame:
1152
    """
1153
    Get table with total pv rooftop capacity per grid district.
1154
1155
    Parameters
1156
    -----------
1157
    scenario : str
1158
        Scenario name.
1159
    Returns
1160
    -------
1161
    pandas.DataFrame
1162
        DataFrame with total rooftop capacity per mv grid.
1163
    """
1164
    if "status" in scenario:
1165
        sources = config.datasets()["solar_rooftop"]["sources"]
1166
1167
        sql = f"""
1168
        SELECT bus_id, SUM(el_capacity) as capacity
1169
        FROM {sources['power_plants']['schema']}.{sources['power_plants']['table']}
1170
        WHERE carrier = 'solar_rooftop'
1171
        AND scenario = '{scenario}'
1172
        GROUP BY bus_id
1173
        """
1174
1175
        df = db.select_dataframe(sql, index_col="bus_id")
1176
1177
    else:
1178
        targets = config.datasets()["solar_rooftop"]["targets"]
1179
1180
        sql = f"""
1181
        SELECT bus as bus_id, control, p_nom as capacity
1182
        FROM {targets['generators']['schema']}.{targets['generators']['table']}
1183
        WHERE carrier = 'solar_rooftop'
1184
        AND scn_name = '{scenario}'
1185
        """
1186
1187
        df = db.select_dataframe(sql, index_col="bus_id")
1188
        df = df.loc[df.control != "Slack"]
1189
1190
    return df
1191
1192
1193
def determine_end_of_life_gens(
1194
    mastr_gdf: gpd.GeoDataFrame,
1195
    scenario_timestamp: pd.Timestamp,
1196
    pv_rooftop_lifetime: pd.Timedelta,
1197
) -> gpd.GeoDataFrame:
1198
    """
1199
    Determine if an old PV system has reached its end of life.
1200
1201
    Parameters
1202
    -----------
1203
    mastr_gdf : geopandas.GeoDataFrame
1204
        GeoDataFrame containing geocoded MaStR data.
1205
    scenario_timestamp : pandas.Timestamp
1206
        Timestamp at which the scenario takes place.
1207
    pv_rooftop_lifetime : pandas.Timedelta
1208
        Average expected lifetime of PV rooftop systems.
1209
    Returns
1210
    -------
1211
    geopandas.GeoDataFrame
1212
        GeoDataFrame containing geocoded MaStR data and info if the system
1213
        has reached its end of life.
1214
    """
1215
    before = mastr_gdf.capacity.sum()
1216
1217
    mastr_gdf = mastr_gdf.assign(
1218
        age=scenario_timestamp - mastr_gdf.commissioning_date
1219
    )
1220
1221
    mastr_gdf = mastr_gdf.assign(
1222
        end_of_life=pv_rooftop_lifetime < mastr_gdf.age
1223
    )
1224
1225
    after = mastr_gdf.loc[~mastr_gdf.end_of_life].capacity.sum()
1226
1227
    logger.debug(
1228
        f"Determined if pv rooftop systems reached their end of life.\nTotal "
1229
        f"capacity: {before}\nActive capacity: {after}"
1230
    )
1231
1232
    return mastr_gdf
1233
1234
1235
def calculate_max_pv_cap_per_building(
1236
    buildings_gdf: gpd.GeoDataFrame,
1237
    mastr_gdf: gpd.GeoDataFrame,
1238
    pv_cap_per_sq_m: float | int,
1239
    roof_factor: float | int,
1240
) -> gpd.GeoDataFrame:
1241
    """
1242
    Calculate the estimated maximum possible PV capacity per building.
1243
1244
    Parameters
1245
    -----------
1246
    buildings_gdf : geopandas.GeoDataFrame
1247
        GeoDataFrame containing OSM buildings data.
1248
    mastr_gdf : geopandas.GeoDataFrame
1249
        GeoDataFrame containing geocoded MaStR data.
1250
    pv_cap_per_sq_m : float, int
1251
        Average expected, installable PV capacity per square meter.
1252
    roof_factor : float, int
1253
        Average for PV usable roof area share.
1254
    Returns
1255
    -------
1256
    geopandas.GeoDataFrame
1257
        GeoDataFrame containing OSM buildings data with estimated maximum PV
1258
        capacity.
1259
    """
1260
    gdf = (
1261
        buildings_gdf.reset_index()
1262
        .rename(columns={"index": "id"})
1263
        .merge(
1264
            mastr_gdf[
1265
                [
1266
                    "capacity",
1267
                    "end_of_life",
1268
                    "building_id",
1269
                    "orientation_uniform",
1270
                    "orientation_primary",
1271
                    "orientation_primary_angle",
1272
                ]
1273
            ],
1274
            how="left",
1275
            left_on="id",
1276
            right_on="building_id",
1277
        )
1278
        .set_index("id")
1279
        .drop(columns="building_id")
1280
    )
1281
1282
    return gdf.assign(
1283
        max_cap=gdf.building_area.multiply(roof_factor * pv_cap_per_sq_m),
1284
        end_of_life=gdf.end_of_life.fillna(True).astype(bool),
1285
        bus_id=gdf.bus_id.astype(int),
1286
    )
1287
1288
1289
def calculate_building_load_factor(
1290
    mastr_gdf: gpd.GeoDataFrame,
1291
    buildings_gdf: gpd.GeoDataFrame,
1292
    rounding: int = 4,
1293
) -> gpd.GeoDataFrame:
1294
    """
1295
    Calculate the roof load factor from existing PV systems.
1296
1297
    Parameters
1298
    -----------
1299
    mastr_gdf : geopandas.GeoDataFrame
1300
        GeoDataFrame containing geocoded MaStR data.
1301
    buildings_gdf : geopandas.GeoDataFrame
1302
        GeoDataFrame containing OSM buildings data.
1303
    rounding : int
1304
        Rounding to use for load factor.
1305
    Returns
1306
    -------
1307
    geopandas.GeoDataFrame
1308
        GeoDataFrame containing geocoded MaStR data with calculated load
1309
        factor.
1310
    """
1311
    gdf = mastr_gdf.merge(
1312
        buildings_gdf[["max_cap", "building_area"]]
1313
        .loc[~buildings_gdf["max_cap"].isna()]
1314
        .reset_index(),
1315
        how="left",
1316
        left_on="building_id",
1317
        right_on="id",
1318
    ).set_index("id")
1319
1320
    return gdf.assign(load_factor=(gdf.capacity / gdf.max_cap).round(rounding))
1321
1322
1323
def get_probability_for_property(
1324
    mastr_gdf: gpd.GeoDataFrame,
1325
    cap_range: tuple[int | float, int | float],
1326
    prop: str,
1327
) -> tuple[np.array, np.array]:
1328
    """
1329
    Calculate the probability of the different options of a property of the
1330
    existing PV plants.
1331
1332
    Parameters
1333
    -----------
1334
    mastr_gdf : geopandas.GeoDataFrame
1335
        GeoDataFrame containing geocoded MaStR data.
1336
    cap_range : tuple(int, int)
1337
        Capacity range of PV plants to look at.
1338
    prop : str
1339
        Property to calculate probabilities for. String needs to be in columns
1340
        of mastr_gdf.
1341
    Returns
1342
    -------
1343
    tuple
1344
        numpy.array
1345
            Unique values of property.
1346
        numpy.array
1347
            Probabilties per unique value.
1348
    """
1349
    cap_range_gdf = mastr_gdf.loc[
1350
        (mastr_gdf.capacity > cap_range[0])
1351
        & (mastr_gdf.capacity <= cap_range[1])
1352
    ]
1353
1354
    if prop == "load_factor":
1355
        cap_range_gdf = cap_range_gdf.loc[cap_range_gdf[prop] <= 1]
1356
1357
    count = Counter(
1358
        cap_range_gdf[prop].loc[
1359
            ~cap_range_gdf[prop].isna()
1360
            & ~cap_range_gdf[prop].isnull()
1361
            & ~(cap_range_gdf[prop] == "None")
1362
        ]
1363
    )
1364
1365
    values = np.array(list(count.keys()))
1366
    probabilities = np.fromiter(count.values(), dtype=float)
1367
    probabilities = probabilities / np.sum(probabilities)
1368
1369
    return values, probabilities
1370
1371
1372
@timer_func
1373
def probabilities(
1374
    mastr_gdf: gpd.GeoDataFrame,
1375
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1376
    properties: list[str] | None = None,
1377
) -> dict:
1378
    """
1379
    Calculate the probability of the different options of properties of the
1380
    existing PV plants.
1381
1382
    Parameters
1383
    -----------
1384
    mastr_gdf : geopandas.GeoDataFrame
1385
        GeoDataFrame containing geocoded MaStR data.
1386
    cap_ranges : list(tuple(int, int))
1387
        List of capacity ranges to distinguish between. The first tuple should
1388
        start with a zero and the last one should end with infinite.
1389
    properties : list(str)
1390
        List of properties to calculate probabilities for. Strings need to be
1391
        in columns of mastr_gdf.
1392
    Returns
1393
    -------
1394
    dict
1395
        Dictionary with values and probabilities per capacity range.
1396
    """
1397
    if cap_ranges is None:
1398
        cap_ranges = [
1399
            (0, 30 / 10**3),
1400
            (30 / 10**3, 100 / 10**3),
1401
            (100 / 10**3, float("inf")),
1402
        ]
1403
    if properties is None:
1404
        properties = [
1405
            "orientation_uniform",
1406
            "orientation_primary",
1407
            "orientation_primary_angle",
1408
            "load_factor",
1409
        ]
1410
1411
    prob_dict = {}
1412
1413
    for cap_range in cap_ranges:
1414
        prob_dict[cap_range] = {
1415
            "values": {},
1416
            "probabilities": {},
1417
        }
1418
1419
        for prop in properties:
1420
            v, p = get_probability_for_property(
1421
                mastr_gdf,
1422
                cap_range,
1423
                prop,
1424
            )
1425
1426
            prob_dict[cap_range]["values"][prop] = v
1427
            prob_dict[cap_range]["probabilities"][prop] = p
1428
1429
    return prob_dict
1430
1431
1432
def cap_share_per_cap_range(
1433
    mastr_gdf: gpd.GeoDataFrame,
1434
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1435
) -> dict[tuple[int | float, int | float], float]:
1436
    """
1437
    Calculate the share of PV capacity from the total PV capacity within
1438
    capacity ranges.
1439
1440
    Parameters
1441
    -----------
1442
    mastr_gdf : geopandas.GeoDataFrame
1443
        GeoDataFrame containing geocoded MaStR data.
1444
    cap_ranges : list(tuple(int, int))
1445
        List of capacity ranges to distinguish between. The first tuple should
1446
        start with a zero and the last one should end with infinite.
1447
    Returns
1448
    -------
1449
    dict
1450
        Dictionary with share of PV capacity from the total PV capacity within
1451
        capacity ranges.
1452
    """
1453
    if cap_ranges is None:
1454
        cap_ranges = [
1455
            (0, 30 / 10**3),
1456
            (30 / 10**3, 100 / 10**3),
1457
            (100 / 10**3, float("inf")),
1458
        ]
1459
1460
    cap_share_dict = {}
1461
1462
    total_cap = mastr_gdf.capacity.sum()
1463
1464
    for cap_range in cap_ranges:
1465
        cap_share = (
1466
            mastr_gdf.loc[
1467
                (mastr_gdf.capacity > cap_range[0])
1468
                & (mastr_gdf.capacity <= cap_range[1])
1469
            ].capacity.sum()
1470
            / total_cap
1471
        )
1472
1473
        cap_share_dict[cap_range] = cap_share
1474
1475
    return cap_share_dict
1476
1477
1478
def mean_load_factor_per_cap_range(
1479
    mastr_gdf: gpd.GeoDataFrame,
1480
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1481
) -> dict[tuple[int | float, int | float], float]:
1482
    """
1483
    Calculate the mean roof load factor per capacity range from existing PV
1484
    plants.
1485
1486
    Parameters
1487
    -----------
1488
    mastr_gdf : geopandas.GeoDataFrame
1489
        GeoDataFrame containing geocoded MaStR data.
1490
    cap_ranges : list(tuple(int, int))
1491
        List of capacity ranges to distinguish between. The first tuple should
1492
        start with a zero and the last one should end with infinite.
1493
    Returns
1494
    -------
1495
    dict
1496
        Dictionary with mean roof load factor per capacity range.
1497
    """
1498
    if cap_ranges is None:
1499
        cap_ranges = [
1500
            (0, 30 / 10**3),
1501
            (30 / 10**3, 100 / 10**3),
1502
            (100 / 10**3, float("inf")),
1503
        ]
1504
1505
    load_factor_dict = {}
1506
1507
    for cap_range in cap_ranges:
1508
        load_factor = mastr_gdf.loc[
1509
            (mastr_gdf.load_factor <= 1)
1510
            & (mastr_gdf.capacity > cap_range[0])
1511
            & (mastr_gdf.capacity <= cap_range[1])
1512
        ].load_factor.mean()
1513
1514
        load_factor_dict[cap_range] = load_factor
1515
1516
    return load_factor_dict
1517
1518
1519
def building_area_range_per_cap_range(
1520
    mastr_gdf: gpd.GeoDataFrame,
1521
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1522
    min_building_size: int | float = 10.0,
1523
    upper_quantile: float = 0.95,
1524
    lower_quantile: float = 0.05,
1525
) -> dict[tuple[int | float, int | float], tuple[int | float, int | float]]:
1526
    """
1527
    Estimate normal building area range per capacity range.
1528
    Calculate the mean roof load factor per capacity range from existing PV
1529
    plants.
1530
1531
    Parameters
1532
    -----------
1533
    mastr_gdf : geopandas.GeoDataFrame
1534
        GeoDataFrame containing geocoded MaStR data.
1535
    cap_ranges : list(tuple(int, int))
1536
        List of capacity ranges to distinguish between. The first tuple should
1537
        start with a zero and the last one should end with infinite.
1538
    min_building_size : int, float
1539
        Minimal building size to consider for PV plants.
1540
    upper_quantile : float
1541
        Upper quantile to estimate maximum building size per capacity range.
1542
    lower_quantile : float
1543
        Lower quantile to estimate minimum building size per capacity range.
1544
    Returns
1545
    -------
1546
    dict
1547
        Dictionary with estimated normal building area range per capacity
1548
        range.
1549
    """
1550
    if cap_ranges is None:
1551
        cap_ranges = [
1552
            (0, 30 / 10**3),
1553
            (30 / 10**3, 100 / 10**3),
1554
            (100 / 10**3, float("inf")),
1555
        ]
1556
1557
    building_area_range_dict = {}
1558
1559
    n_ranges = len(cap_ranges)
1560
1561
    for count, cap_range in enumerate(cap_ranges):
1562
        cap_range_gdf = mastr_gdf.loc[
1563
            (mastr_gdf.capacity > cap_range[0])
1564
            & (mastr_gdf.capacity <= cap_range[1])
1565
        ]
1566
1567
        if count == 0:
1568
            building_area_range_dict[cap_range] = (
1569
                min_building_size,
1570
                cap_range_gdf.building_area.quantile(upper_quantile),
1571
            )
1572
        elif count == n_ranges - 1:
1573
            building_area_range_dict[cap_range] = (
1574
                cap_range_gdf.building_area.quantile(lower_quantile),
1575
                float("inf"),
1576
            )
1577
        else:
1578
            building_area_range_dict[cap_range] = (
1579
                cap_range_gdf.building_area.quantile(lower_quantile),
1580
                cap_range_gdf.building_area.quantile(upper_quantile),
1581
            )
1582
1583
    values = list(building_area_range_dict.values())
1584
1585
    building_area_range_normed_dict = {}
1586
1587
    for count, (cap_range, (min_area, max_area)) in enumerate(
1588
        building_area_range_dict.items()
1589
    ):
1590
        if count == 0:
1591
            building_area_range_normed_dict[cap_range] = (
1592
                min_area,
1593
                np.mean((values[count + 1][0], max_area)),
1594
            )
1595
        elif count == n_ranges - 1:
1596
            building_area_range_normed_dict[cap_range] = (
1597
                np.mean((values[count - 1][1], min_area)),
1598
                max_area,
1599
            )
1600
        else:
1601
            building_area_range_normed_dict[cap_range] = (
1602
                np.mean((values[count - 1][1], min_area)),
1603
                np.mean((values[count + 1][0], max_area)),
1604
            )
1605
1606
    return building_area_range_normed_dict
1607
1608
1609
@timer_func
1610
def desaggregate_pv_in_mv_grid(
1611
    buildings_gdf: gpd.GeoDataFrame,
1612
    pv_cap: float | int,
1613
    **kwargs,
1614
) -> gpd.GeoDataFrame:
1615
    """
1616
    Desaggregate PV capacity on buildings within a given grid district.
1617
1618
    Parameters
1619
    -----------
1620
    buildings_gdf : geopandas.GeoDataFrame
1621
        GeoDataFrame containing buildings within the grid district.
1622
    pv_cap : float, int
1623
        PV capacity to desaggregate.
1624
    Other Parameters
1625
    -----------
1626
    prob_dict : dict
1627
        Dictionary with values and probabilities per capacity range.
1628
    cap_share_dict : dict
1629
        Dictionary with share of PV capacity from the total PV capacity within
1630
        capacity ranges.
1631
    building_area_range_dict : dict
1632
        Dictionary with estimated normal building area range per capacity
1633
        range.
1634
    load_factor_dict : dict
1635
        Dictionary with mean roof load factor per capacity range.
1636
    seed : int
1637
        Seed to use for random operations with NumPy and pandas.
1638
    pv_cap_per_sq_m : float, int
1639
        Average expected, installable PV capacity per square meter.
1640
    Returns
1641
    -------
1642
    geopandas.GeoDataFrame
1643
        GeoDataFrame containing OSM building data with desaggregated PV
1644
        plants.
1645
    """
1646
    bus_id = int(buildings_gdf.bus_id.iat[0])
1647
1648
    rng = default_rng(seed=kwargs["seed"])
1649
    random_state = RandomState(seed=kwargs["seed"])
1650
1651
    results_df = pd.DataFrame(columns=buildings_gdf.columns)
1652
1653
    for cap_range, share in kwargs["cap_share_dict"].items():
1654
        pv_cap_range = pv_cap * share
1655
1656
        b_area_min, b_area_max = kwargs["building_area_range_dict"][cap_range]
1657
1658
        cap_range_buildings_gdf = buildings_gdf.loc[
1659
            ~buildings_gdf.index.isin(results_df.index)
1660
            & (buildings_gdf.building_area > b_area_min)
1661
            & (buildings_gdf.building_area <= b_area_max)
1662
        ]
1663
1664
        mean_load_factor = kwargs["load_factor_dict"][cap_range]
1665
        cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1666
            mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1667
            load_factor=np.nan,
1668
            capacity=np.nan,
1669
        )
1670
1671
        total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1672
1673
        if total_mean_cap == 0:
1674
            logger.warning(
1675
                f"There are no matching roof for capacity range {cap_range} "
1676
                f"kW in grid {bus_id}. Using all buildings as fallback."
1677
            )
1678
1679
            cap_range_buildings_gdf = buildings_gdf.loc[
1680
                ~buildings_gdf.index.isin(results_df.index)
1681
            ]
1682
1683
            if len(cap_range_buildings_gdf) == 0:
1684
                logger.warning(
1685
                    "There are no roofes available for capacity range "
1686
                    f"{cap_range} kW in grid {bus_id}. Allowing dual use."
1687
                )
1688
                cap_range_buildings_gdf = buildings_gdf.copy()
1689
1690
            cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1691
                mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1692
                load_factor=np.nan,
1693
                capacity=np.nan,
1694
            )
1695
1696
            total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1697
1698
        elif total_mean_cap < pv_cap_range:
1699
            logger.warning(
1700
                f"Average roof utilization of the roof area in grid {bus_id} "
1701
                f"and capacity range {cap_range} kW is not sufficient. The "
1702
                "roof utilization will be above average."
1703
            )
1704
1705
        frac = max(
1706
            pv_cap_range / total_mean_cap,
1707
            1 / len(cap_range_buildings_gdf),
1708
        )
1709
1710
        samples_gdf = cap_range_buildings_gdf.sample(
1711
            frac=min(1, frac),
1712
            random_state=random_state,
1713
        )
1714
1715
        cap_range_dict = kwargs["prob_dict"][cap_range]
1716
1717
        values_dict = cap_range_dict["values"]
1718
        p_dict = cap_range_dict["probabilities"]
1719
1720
        load_factors = rng.choice(
1721
            a=values_dict["load_factor"],
1722
            size=len(samples_gdf),
1723
            p=p_dict["load_factor"],
1724
        )
1725
1726
        samples_gdf = samples_gdf.assign(
1727
            load_factor=load_factors,
1728
            capacity=(
1729
                samples_gdf.building_area
1730
                * load_factors
1731
                * kwargs["pv_cap_per_sq_m"]
1732
            ).clip(lower=0.4),
1733
        )
1734
1735
        missing_factor = pv_cap_range / samples_gdf.capacity.sum()
1736
1737
        samples_gdf = samples_gdf.assign(
1738
            capacity=(samples_gdf.capacity * missing_factor),
1739
            load_factor=(samples_gdf.load_factor * missing_factor),
1740
        )
1741
1742
        assert np.isclose(
1743
            samples_gdf.capacity.sum(),
1744
            pv_cap_range,
1745
            rtol=1e-03,
1746
        ), f"{samples_gdf.capacity.sum()} != {pv_cap_range}"
1747
1748
        results_df = pd.concat(
1749
            [
1750
                results_df,
1751
                samples_gdf,
1752
            ],
1753
        )
1754
1755
    total_missing_factor = pv_cap / results_df.capacity.sum()
1756
1757
    results_df = results_df.assign(
1758
        capacity=(results_df.capacity * total_missing_factor),
1759
    )
1760
1761
    assert np.isclose(
1762
        results_df.capacity.sum(),
1763
        pv_cap,
1764
        rtol=1e-03,
1765
    ), f"{results_df.capacity.sum()} != {pv_cap}"
1766
1767
    return gpd.GeoDataFrame(
1768
        results_df,
1769
        crs=samples_gdf.crs,
0 ignored issues
show
introduced by
The variable samples_gdf does not seem to be defined in case the for loop on line 1653 is not entered. Are you sure this can never be the case?
Loading history...
1770
        geometry="geom",
1771
    )
1772
1773
1774
@timer_func
1775
def desaggregate_pv(
1776
    buildings_gdf: gpd.GeoDataFrame,
1777
    cap_df: pd.DataFrame,
1778
    **kwargs,
1779
) -> gpd.GeoDataFrame:
1780
    """
1781
    Desaggregate PV capacity on buildings within a given grid district.
1782
1783
    Parameters
1784
    -----------
1785
    buildings_gdf : geopandas.GeoDataFrame
1786
        GeoDataFrame containing OSM buildings data.
1787
    cap_df : pandas.DataFrame
1788
        DataFrame with total rooftop capacity per mv grid.
1789
    Other Parameters
1790
    -----------
1791
    prob_dict : dict
1792
        Dictionary with values and probabilities per capacity range.
1793
    cap_share_dict : dict
1794
        Dictionary with share of PV capacity from the total PV capacity within
1795
        capacity ranges.
1796
    building_area_range_dict : dict
1797
        Dictionary with estimated normal building area range per capacity
1798
        range.
1799
    load_factor_dict : dict
1800
        Dictionary with mean roof load factor per capacity range.
1801
    seed : int
1802
        Seed to use for random operations with NumPy and pandas.
1803
    pv_cap_per_sq_m : float, int
1804
        Average expected, installable PV capacity per square meter.
1805
    Returns
1806
    -------
1807
    geopandas.GeoDataFrame
1808
        GeoDataFrame containing OSM building data with desaggregated PV
1809
        plants.
1810
    """
1811
    allocated_buildings_gdf = buildings_gdf.loc[~buildings_gdf.end_of_life]
1812
1813
    building_bus_ids = set(buildings_gdf.bus_id)
1814
    cap_bus_ids = set(cap_df.index)
1815
1816
    logger.debug(
1817
        f"Bus IDs from buildings: {len(building_bus_ids)}\nBus IDs from "
1818
        f"capacity: {len(cap_bus_ids)}"
1819
    )
1820
1821
    if len(building_bus_ids) > len(cap_bus_ids):
1822
        missing = building_bus_ids - cap_bus_ids
1823
    else:
1824
        missing = cap_bus_ids - building_bus_ids
1825
1826
    logger.debug(str(missing))
1827
1828
    bus_ids = np.intersect1d(list(building_bus_ids), list(cap_bus_ids))
1829
1830
    # assert set(buildings_gdf.bus_id.unique()) == set(cap_df.index)
1831
1832
    for bus_id in bus_ids:
1833
        buildings_grid_gdf = buildings_gdf.loc[buildings_gdf.bus_id == bus_id]
1834
1835
        pv_installed_gdf = buildings_grid_gdf.loc[
1836
            ~buildings_grid_gdf.end_of_life
1837
        ]
1838
1839
        pv_installed = pv_installed_gdf.capacity.sum()
1840
1841
        pot_buildings_gdf = buildings_grid_gdf.drop(
1842
            index=pv_installed_gdf.index
1843
        )
1844
1845
        if len(pot_buildings_gdf) == 0:
1846
            logger.error(
1847
                f"In grid {bus_id} there are no potential buildings to "
1848
                f"allocate PV capacity to. The grid is skipped. This message "
1849
                f"should only appear doing test runs with few buildings."
1850
            )
1851
1852
            continue
1853
1854
        pv_target = cap_df.at[bus_id, "capacity"]
1855
1856
        logger.debug(f"pv_target: {pv_target}")
1857
1858
        pv_missing = pv_target - pv_installed
1859
1860
        if pv_missing <= 0:
1861
            logger.warning(
1862
                f"In grid {bus_id} there is more PV installed "
1863
                f"({pv_installed: g} kW) in status Quo than allocated within "
1864
                f"the scenario ({pv_target: g} kW). "
1865
                f"No new generators are added."
1866
            )
1867
1868
            continue
1869
1870
        if pot_buildings_gdf.max_cap.sum() < pv_missing:
1871
            logger.error(
1872
                f"In grid {bus_id} there is less PV potential ("
1873
                f"{pot_buildings_gdf.max_cap.sum():g} MW) than allocated PV "
1874
                f"capacity ({pv_missing:g} MW). The average roof utilization "
1875
                f"will be very high."
1876
            )
1877
1878
        gdf = desaggregate_pv_in_mv_grid(
1879
            buildings_gdf=pot_buildings_gdf,
1880
            pv_cap=pv_missing,
1881
            **kwargs,
1882
        )
1883
1884
        logger.debug(f"New cap in grid {bus_id}: {gdf.capacity.sum()}")
1885
        logger.debug(f"Installed cap in grid {bus_id}: {pv_installed}")
1886
        logger.debug(
1887
            f"Total cap in grid {bus_id}: {gdf.capacity.sum() + pv_installed}"
1888
        )
1889
1890
        if not np.isclose(
1891
            gdf.capacity.sum() + pv_installed, pv_target, rtol=1e-3
1892
        ):
1893
            logger.warning(
1894
                f"The desired capacity and actual capacity in grid {bus_id} "
1895
                f"differ.\n"
1896
                f"Desired cap: {pv_target}\nActual cap: "
1897
                f"{gdf.capacity.sum() + pv_installed}"
1898
            )
1899
1900
        pre_cap = allocated_buildings_gdf.capacity.sum()
1901
        new_cap = gdf.capacity.sum()
1902
1903
        allocated_buildings_gdf = pd.concat(
1904
            [
1905
                allocated_buildings_gdf,
1906
                gdf,
1907
            ]
1908
        )
1909
1910
        total_cap = allocated_buildings_gdf.capacity.sum()
1911
1912
        assert np.isclose(pre_cap + new_cap, total_cap)
1913
1914
    logger.debug("Desaggregated scenario.")
1915
    logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}")
1916
    logger.debug(
1917
        f"Generator capacity: " f"{allocated_buildings_gdf.capacity.sum(): g}"
1918
    )
1919
1920
    return gpd.GeoDataFrame(
1921
        allocated_buildings_gdf,
1922
        crs=gdf.crs,
0 ignored issues
show
introduced by
The variable gdf does not seem to be defined for all execution paths.
Loading history...
1923
        geometry="geom",
1924
    )
1925
1926
1927
@timer_func
1928
def add_buildings_meta_data(
1929
    buildings_gdf: gpd.GeoDataFrame,
1930
    prob_dict: dict,
1931
    seed: int,
1932
) -> gpd.GeoDataFrame:
1933
    """
1934
    Randomly add additional metadata to desaggregated PV plants.
1935
1936
    Parameters
1937
    -----------
1938
    buildings_gdf : geopandas.GeoDataFrame
1939
        GeoDataFrame containing OSM buildings data with desaggregated PV
1940
        plants.
1941
    prob_dict : dict
1942
        Dictionary with values and probabilities per capacity range.
1943
    seed : int
1944
        Seed to use for random operations with NumPy and pandas.
1945
    Returns
1946
    -------
1947
    geopandas.GeoDataFrame
1948
        GeoDataFrame containing OSM building data with desaggregated PV
1949
        plants.
1950
    """
1951
    rng = default_rng(seed=seed)
1952
    buildings_gdf = buildings_gdf.reset_index().rename(
1953
        columns={
1954
            "index": "building_id",
1955
        }
1956
    )
1957
1958
    for (min_cap, max_cap), cap_range_prob_dict in prob_dict.items():
1959
        cap_range_gdf = buildings_gdf.loc[
1960
            (buildings_gdf.capacity >= min_cap)
1961
            & (buildings_gdf.capacity < max_cap)
1962
        ]
1963
1964
        for key, values in cap_range_prob_dict["values"].items():
1965
            if key == "load_factor":
1966
                continue
1967
1968
            gdf = cap_range_gdf.loc[
1969
                cap_range_gdf[key].isna()
1970
                | cap_range_gdf[key].isnull()
1971
                | (cap_range_gdf[key] == "None")
1972
            ]
1973
1974
            key_vals = rng.choice(
1975
                a=values,
1976
                size=len(gdf),
1977
                p=cap_range_prob_dict["probabilities"][key],
1978
            )
1979
1980
            buildings_gdf.loc[gdf.index, key] = key_vals
1981
1982
    return buildings_gdf
1983
1984
1985
def add_commissioning_date(
1986
    buildings_gdf: gpd.GeoDataFrame,
1987
    start: pd.Timestamp,
1988
    end: pd.Timestamp,
1989
    seed: int,
1990
):
1991
    """
1992
    Randomly and linear add start-up date to new pv generators.
1993
1994
    Parameters
1995
    ----------
1996
    buildings_gdf : geopandas.GeoDataFrame
1997
        GeoDataFrame containing OSM buildings data with desaggregated PV
1998
        plants.
1999
    start : pandas.Timestamp
2000
        Minimum Timestamp to use.
2001
    end : pandas.Timestamp
2002
        Maximum Timestamp to use.
2003
    seed : int
2004
        Seed to use for random operations with NumPy and pandas.
2005
    Returns
2006
    -------
2007
    geopandas.GeoDataFrame
2008
        GeoDataFrame containing OSM buildings data with start-up date added.
2009
    """
2010
    rng = default_rng(seed=seed)
2011
2012
    date_range = pd.date_range(start=start, end=end, freq="1D")
2013
2014
    return buildings_gdf.assign(
2015
        commissioning_date=rng.choice(date_range, size=len(buildings_gdf))
2016
    )
2017
2018
2019
@timer_func
2020
def allocate_scenarios(
2021
    mastr_gdf: gpd.GeoDataFrame,
2022
    valid_buildings_gdf: gpd.GeoDataFrame,
2023
    last_scenario_gdf: gpd.GeoDataFrame,
2024
    scenario: str,
2025
):
2026
    """
2027
    Desaggregate and allocate scenario pv rooftop ramp-ups onto buildings.
2028
2029
    Parameters
2030
    ----------
2031
    mastr_gdf : geopandas.GeoDataFrame
2032
        GeoDataFrame containing geocoded MaStR data.
2033
    valid_buildings_gdf : geopandas.GeoDataFrame
2034
        GeoDataFrame containing OSM buildings data.
2035
    last_scenario_gdf : geopandas.GeoDataFrame
2036
        GeoDataFrame containing OSM buildings matched with pv generators from
2037
        temporally preceding scenario.
2038
    scenario : str
2039
        Scenario to desaggrgate and allocate.
2040
    Returns
2041
    -------
2042
    tuple
2043
        geopandas.GeoDataFrame
2044
            GeoDataFrame containing OSM buildings matched with pv generators.
2045
        pandas.DataFrame
2046
            DataFrame containing pv rooftop capacity per grid id.
2047
    """
2048
    cap_per_bus_id_df = cap_per_bus_id(scenario)
2049
2050
    if cap_per_bus_id_df.empty:
2051
        print(f"No PV rooftop in scenario {scenario}")
2052
        return
2053
2054
    logger.debug(
2055
        f"cap_per_bus_id_df total capacity: {cap_per_bus_id_df.capacity.sum()}"
2056
    )
2057
2058
    last_scenario_gdf = determine_end_of_life_gens(
2059
        last_scenario_gdf,
2060
        SCENARIO_TIMESTAMP[scenario],
2061
        PV_ROOFTOP_LIFETIME,
2062
    )
2063
2064
    buildings_gdf = calculate_max_pv_cap_per_building(
2065
        valid_buildings_gdf,
2066
        last_scenario_gdf,
2067
        PV_CAP_PER_SQ_M,
2068
        ROOF_FACTOR,
2069
    )
2070
2071
    mastr_gdf = calculate_building_load_factor(
2072
        mastr_gdf,
2073
        buildings_gdf,
2074
    )
2075
2076
    probabilities_dict = probabilities(
2077
        mastr_gdf,
2078
        cap_ranges=CAP_RANGES,
2079
    )
2080
2081
    cap_share_dict = cap_share_per_cap_range(
2082
        mastr_gdf,
2083
        cap_ranges=CAP_RANGES,
2084
    )
2085
2086
    load_factor_dict = mean_load_factor_per_cap_range(
2087
        mastr_gdf,
2088
        cap_ranges=CAP_RANGES,
2089
    )
2090
2091
    building_area_range_dict = building_area_range_per_cap_range(
2092
        mastr_gdf,
2093
        cap_ranges=CAP_RANGES,
2094
        min_building_size=MIN_BUILDING_SIZE,
2095
        upper_quantile=UPPER_QUANTILE,
2096
        lower_quantile=LOWER_QUANTILE,
2097
    )
2098
2099
    allocated_buildings_gdf = desaggregate_pv(
2100
        buildings_gdf=buildings_gdf,
2101
        cap_df=cap_per_bus_id_df,
2102
        prob_dict=probabilities_dict,
2103
        cap_share_dict=cap_share_dict,
2104
        building_area_range_dict=building_area_range_dict,
2105
        load_factor_dict=load_factor_dict,
2106
        seed=SEED,
2107
        pv_cap_per_sq_m=PV_CAP_PER_SQ_M,
2108
    )
2109
2110
    allocated_buildings_gdf = allocated_buildings_gdf.assign(scenario=scenario)
2111
2112
    meta_buildings_gdf = frame_to_numeric(
2113
        add_buildings_meta_data(
2114
            allocated_buildings_gdf,
2115
            probabilities_dict,
2116
            SEED,
2117
        )
2118
    )
2119
2120
    return (
2121
        add_commissioning_date(
2122
            meta_buildings_gdf,
2123
            start=last_scenario_gdf.commissioning_date.max(),
2124
            end=SCENARIO_TIMESTAMP[scenario],
2125
            seed=SEED,
2126
        ),
2127
        cap_per_bus_id_df,
2128
    )
2129
2130
2131
class EgonPowerPlantPvRoofBuilding(Base):
2132
    """
2133
    Class definition of table supply.egon_power_plants_pv_roof_building.
2134
2135
    """
2136
2137
    __tablename__ = "egon_power_plants_pv_roof_building"
2138
    __table_args__ = {"schema": "supply"}
2139
2140
    index = Column(Integer, primary_key=True, index=True)
2141
    scenario = Column(String)
2142
    bus_id = Column(Integer, nullable=True)
2143
    building_id = Column(Integer)
2144
    gens_id = Column(String, nullable=True)
2145
    capacity = Column(Float)
2146
    orientation_uniform = Column(Float)
2147
    orientation_primary = Column(String)
2148
    orientation_primary_angle = Column(String)
2149
    voltage_level = Column(Integer)
2150
    weather_cell_id = Column(Integer)
2151
2152
2153
def add_metadata():
2154
    schema = "supply"
2155
    table = "egon_power_plants_pv_roof_building"
2156
    name = f"{schema}.{table}"
2157
    deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"]
2158
    deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][
2159
        "zenodo"
2160
    ]["deposit_id"]
2161
2162
    contris = contributors(["kh", "kh"])
2163
2164
    contris[0]["date"] = "2023-03-16"
2165
2166
    contris[0]["object"] = "metadata"
2167
    contris[1]["object"] = "dataset"
2168
2169
    contris[0]["comment"] = "Add metadata to dataset."
2170
    contris[1]["comment"] = "Add workflow to generate dataset."
2171
2172
    meta = {
2173
        "name": name,
2174
        "title": "eGon power plants rooftop solar",
2175
        "id": "WILL_BE_SET_AT_PUBLICATION",
2176
        "description": (
2177
            "eGon power plants rooftop solar systems allocated to buildings"
2178
        ),
2179
        "language": "en-US",
2180
        "keywords": ["photovoltaik", "solar", "pv", "mastr", "status quo"],
2181
        "publicationDate": datetime.date.today().isoformat(),
2182
        "context": context(),
2183
        "spatial": {
2184
            "location": "none",
2185
            "extent": "Germany",
2186
            "resolution": "building",
2187
        },
2188
        "temporal": {
2189
            "referenceDate": (
2190
                config.datasets()["mastr_new"]["egon2021_date_max"].split(" ")[
2191
                    0
2192
                ]
2193
            ),
2194
            "timeseries": {},
2195
        },
2196
        "sources": [
2197
            {
2198
                "title": "Data bundle for egon-data",
2199
                "description": (
2200
                    "Data bundle for egon-data: A transparent and "
2201
                    "reproducible data processing pipeline for energy "
2202
                    "system modeling"
2203
                ),
2204
                "path": (
2205
                    "https://zenodo.org/record/"
2206
                    f"{deposit_id_data_bundle}#.Y_dWM4CZMVM"
2207
                ),
2208
                "licenses": [license_dedl(attribution="© Cußmann, Ilka")],
2209
            },
2210
            {
2211
                "title": ("open-MaStR power unit registry for eGo^n project"),
2212
                "description": (
2213
                    "Data from Marktstammdatenregister (MaStR) data using "
2214
                    "the data dump from 2022-11-17 for eGon-data."
2215
                ),
2216
                "path": (f"https://zenodo.org/record/{deposit_id_mastr}"),
2217
                "licenses": [license_dedl(attribution="© Amme, Jonathan")],
2218
            },
2219
            sources()["openstreetmap"],
2220
            sources()["era5"],
2221
            sources()["vg250"],
2222
            sources()["egon-data"],
2223
        ],
2224
        "licenses": [license_odbl("© eGon development team")],
2225
        "contributors": contris,
2226
        "resources": [
2227
            {
2228
                "profile": "tabular-data-resource",
2229
                "name": name,
2230
                "path": "None",
2231
                "format": "PostgreSQL",
2232
                "encoding": "UTF-8",
2233
                "schema": {
2234
                    "fields": generate_resource_fields_from_db_table(
2235
                        schema,
2236
                        table,
2237
                    ),
2238
                    "primaryKey": "index",
2239
                },
2240
                "dialect": {"delimiter": "", "decimalSeparator": ""},
2241
            }
2242
        ],
2243
        "review": {"path": "", "badge": ""},
2244
        "metaMetadata": meta_metadata(),
2245
        "_comment": {
2246
            "metadata": (
2247
                "Metadata documentation and explanation (https://github."
2248
                "com/OpenEnergyPlatform/oemetadata/blob/master/metadata/"
2249
                "v141/metadata_key_description.md)"
2250
            ),
2251
            "dates": (
2252
                "Dates and time must follow the ISO8601 including time "
2253
                "zone (YYYY-MM-DD or YYYY-MM-DDThh:mm:ss±hh)"
2254
            ),
2255
            "units": "Use a space between numbers and units (100 m)",
2256
            "languages": (
2257
                "Languages must follow the IETF (BCP47) format (en-GB, "
2258
                "en-US, de-DE)"
2259
            ),
2260
            "licenses": (
2261
                "License name must follow the SPDX License List "
2262
                "(https://spdx.org/licenses/)"
2263
            ),
2264
            "review": (
2265
                "Following the OEP Data Review (https://github.com/"
2266
                "OpenEnergyPlatform/data-preprocessing/wiki)"
2267
            ),
2268
            "none": "If not applicable use (none)",
2269
        },
2270
    }
2271
2272
    dialect = get_dialect(f"oep-v{meta_metadata()['metadataVersion'][4:7]}")()
2273
2274
    meta = dialect.compile_and_render(dialect.parse(json.dumps(meta)))
2275
2276
    db.submit_comment(
2277
        f"'{json.dumps(meta)}'",
2278
        schema,
2279
        table,
2280
    )
2281
2282
2283
def create_scenario_table(buildings_gdf):
2284
    """Create mapping table pv_unit <-> building for scenario"""
2285
    EgonPowerPlantPvRoofBuilding.__table__.drop(bind=engine, checkfirst=True)
2286
    EgonPowerPlantPvRoofBuilding.__table__.create(bind=engine, checkfirst=True)
2287
2288
    buildings_gdf[COLS_TO_EXPORT].reset_index().to_sql(
2289
        name=EgonPowerPlantPvRoofBuilding.__table__.name,
2290
        schema=EgonPowerPlantPvRoofBuilding.__table__.schema,
2291
        con=db.engine(),
2292
        if_exists="append",
2293
        index=False,
2294
    )
2295
2296
    add_metadata()
2297
2298
2299
def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
2300
    sql = """
2301
    SELECT building_id, zensus_population_id
2302
    FROM boundaries.egon_map_zensus_mvgd_buildings
2303
    """
2304
2305
    buildings_gdf = buildings_gdf.merge(
2306
        right=db.select_dataframe(sql).drop_duplicates(subset="building_id"),
2307
        how="left",
2308
        on="building_id",
2309
    )
2310
2311
    sql = """
2312
    SELECT zensus_population_id, w_id as weather_cell_id
2313
    FROM boundaries.egon_map_zensus_weather_cell
2314
    """
2315
2316
    buildings_gdf = buildings_gdf.merge(
2317
        right=db.select_dataframe(sql).drop_duplicates(
2318
            subset="zensus_population_id"
2319
        ),
2320
        how="left",
2321
        on="zensus_population_id",
2322
    )
2323
2324
    if buildings_gdf.weather_cell_id.isna().any():
2325
        missing = buildings_gdf.loc[
2326
            buildings_gdf.weather_cell_id.isna(), "building_id"
2327
        ].tolist()
2328
2329
        raise ValueError(
2330
            f"Following buildings don't have a weather cell id: {missing}"
2331
        )
2332
2333
    return buildings_gdf
2334
2335
2336
def add_bus_ids_sq(
2337
    buildings_gdf: gpd.GeoDataFrame,
2338
) -> gpd.GeoDataFrame:
2339
    """Add bus ids for status_quo units
2340
2341
    Parameters
2342
    -----------
2343
    buildings_gdf : geopandas.GeoDataFrame
2344
        GeoDataFrame containing OSM buildings data with desaggregated PV
2345
        plants.
2346
    Returns
2347
    -------
2348
    geopandas.GeoDataFrame
2349
        GeoDataFrame containing OSM building data with bus_id per
2350
        generator.
2351
    """
2352
    grid_districts_gdf = grid_districts(EPSG)
2353
2354
    mask = buildings_gdf.scenario == "status_quo"
2355
2356
    buildings_gdf.loc[mask, "bus_id"] = (
2357
        buildings_gdf.loc[mask]
2358
        .sjoin(grid_districts_gdf, how="left")
2359
        .index_right
2360
    )
2361
2362
    return buildings_gdf
2363
2364
2365 View Code Duplication
def infer_voltage_level(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2366
    units_gdf: gpd.GeoDataFrame,
2367
) -> gpd.GeoDataFrame:
2368
    """
2369
    Infer nan values in voltage level derived from generator capacity to
2370
    the power plants.
2371
2372
    Parameters
2373
    -----------
2374
    units_gdf : geopandas.GeoDataFrame
2375
        GeoDataFrame containing units with voltage levels from MaStR
2376
    Returnsunits_gdf: gpd.GeoDataFrame
2377
    -------
2378
    geopandas.GeoDataFrame
2379
        GeoDataFrame containing units all having assigned a voltage level.
2380
    """
2381
2382
    def voltage_levels(p: float) -> int:
2383
        if p <= 0.1:
2384
            return 7
2385
        elif p <= 0.2:
2386
            return 6
2387
        elif p <= 5.5:
2388
            return 5
2389
        elif p <= 20:
2390
            return 4
2391
        elif p <= 120:
2392
            return 3
2393
        return 1
2394
2395
    units_gdf["voltage_level_inferred"] = False
2396
    mask = units_gdf.voltage_level.isna()
2397
    units_gdf.loc[mask, "voltage_level_inferred"] = True
2398
    units_gdf.loc[mask, "voltage_level"] = units_gdf.loc[mask].capacity.apply(
2399
        voltage_levels
2400
    )
2401
2402
    return units_gdf
2403
2404
2405
def pv_rooftop_to_buildings():
2406
    """Main script, executed as task"""
2407
2408
    mastr_gdf = load_mastr_data()
2409
2410
    status_quo = "status2023"  # FIXME: Hard coded
2411
2412
    ts = pd.Timestamp(
2413
        config.datasets()["mastr_new"][f"{status_quo}_date_max"], tz="UTC"
2414
    )
2415
2416
    mastr_gdf = mastr_gdf.loc[mastr_gdf.commissioning_date <= ts]
2417
2418
    buildings_gdf = load_building_data()
2419
2420
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_to_buildings(
2421
        mastr_gdf, buildings_gdf
2422
    )
2423
2424
    all_buildings_gdf = (
2425
        desagg_mastr_gdf.assign(scenario=status_quo)
2426
        .reset_index()
2427
        .rename(columns={"geometry": "geom"})
2428
        .set_geometry("geom")
2429
    )
2430
2431
    scenario_buildings_gdf_sq = all_buildings_gdf.copy()
2432
2433
    cap_per_bus_id_df = pd.DataFrame()
2434
2435
    for scenario in SCENARIOS:
2436
        if scenario == status_quo:
2437
            scenario_buildings_gdf = scenario_buildings_gdf_sq.copy()
2438
        elif "status" in scenario:
2439
            ts = pd.Timestamp(
2440
                config.datasets()["mastr_new"][f"{scenario}_date_max"],
2441
                tz="UTC",
2442
            )
2443
2444
            scenario_buildings_gdf = scenario_buildings_gdf_sq.loc[
2445
                scenario_buildings_gdf.commissioning_date <= ts
0 ignored issues
show
introduced by
The variable scenario_buildings_gdf does not seem to be defined in case the for loop on line 2435 is not entered. Are you sure this can never be the case?
Loading history...
2446
            ].copy()
2447
2448
        else:
2449
            logger.debug(f"Desaggregating scenario {scenario}.")
2450
2451
            (
2452
                scenario_buildings_gdf,
2453
                cap_per_bus_id_scenario_df,
2454
            ) = allocate_scenarios(  # noqa: F841
2455
                desagg_mastr_gdf,
2456
                desagg_buildings_gdf,
2457
                scenario_buildings_gdf_sq,
2458
                scenario,
2459
            )
2460
2461
            cap_per_bus_id_df = pd.concat(
2462
                [cap_per_bus_id_df, cap_per_bus_id_scenario_df]
2463
            )
2464
2465
        all_buildings_gdf = gpd.GeoDataFrame(
2466
            pd.concat(
2467
                [all_buildings_gdf, scenario_buildings_gdf], ignore_index=True
2468
            ),
2469
            crs=scenario_buildings_gdf.crs,
2470
            geometry="geom",
2471
        )
2472
2473
    # add weather cell
2474
    all_buildings_gdf = add_weather_cell_id(all_buildings_gdf)
2475
2476
    # add bus IDs for status quo scenario
2477
    all_buildings_gdf = add_bus_ids_sq(all_buildings_gdf)
2478
2479
    # export scenario
2480
    create_scenario_table(infer_voltage_level(all_buildings_gdf))
2481