Passed
Pull Request — dev (#1170)
by
unknown
05:05
created

drop_buildings_outside_grids()   A

Complexity

Conditions 1

Size

Total Lines 25
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 25
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""
2
Distribute MaStR PV rooftop capacities to OSM and synthetic buildings. Generate
3
new PV rooftop generators for scenarios eGon2035 and eGon100RE.
4
5
See documentation section :ref:`pv-rooftop-ref` for more information.
6
7
"""
8
from __future__ import annotations
9
10
from collections import Counter
11
from functools import wraps
12
from time import perf_counter
13
import datetime
14
import json
15
16
from geoalchemy2 import Geometry
17
from loguru import logger
18
from numpy.random import RandomState, default_rng
19
from omi.dialects import get_dialect
20
from pyproj.crs.crs import CRS
21
from sqlalchemy import BigInteger, Column, Float, Integer, String
22
from sqlalchemy.dialects.postgresql import HSTORE
23
from sqlalchemy.ext.declarative import declarative_base
24
import geopandas as gpd
25
import numpy as np
26
import pandas as pd
27
28
from egon.data import config, db
29
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
30
    OsmBuildingsSynthetic,
31
)
32
from egon.data.datasets.power_plants.mastr_db_classes import EgonPowerPlantsPv
33
from egon.data.datasets.scenario_capacities import EgonScenarioCapacities
34
from egon.data.datasets.scenario_parameters import get_scenario_year
35
from egon.data.datasets.zensus_vg250 import Vg250Gem
36
from egon.data.metadata import (
37
    context,
38
    contributors,
39
    generate_resource_fields_from_db_table,
40
    license_dedl,
41
    license_odbl,
42
    meta_metadata,
43
    meta_metadata,
44
    sources,
45
)
46
47
engine = db.engine()
48
Base = declarative_base()
49
SEED = int(config.settings()["egon-data"]["--random-seed"])
50
51
# TODO: move to yml
52
MASTR_INDEX_COL = "gens_id"
53
54
EPSG = 4326
55
SRID = 3035
56
57
# data cleaning
58
MAX_REALISTIC_PV_CAP = 23500 / 10**3
59
MIN_REALISTIC_PV_CAP = 0.1 / 10**3
60
61
# show additional logging information
62
VERBOSE = False
63
64
# Number of quantiles
65
Q = 5
66
67
# Scenario Data
68
SCENARIOS = config.settings()["egon-data"]["--scenarios"]
69
SCENARIO_TIMESTAMP = {
70
    "status2019": pd.Timestamp("2020-01-01", tz="UTC"),
71
    "status2023": pd.Timestamp("2024-01-01", tz="UTC"),
72
    "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"),
73
    "eGon100RE": pd.Timestamp("2050-01-01", tz="UTC"),
74
}
75
PV_ROOFTOP_LIFETIME = pd.Timedelta(20 * 365, unit="D")
76
77
# Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp
78
# https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf
79
MODUL_CAP = 0.4 / 10**3  # MWp
80
MODUL_SIZE = 1.096 * 1.754  # m²
81
PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE
82
83
# Estimation of usable roof area
84
# Factor for the conversion of building area to roof area
85
# estimation mean roof pitch: 35°
86
# estimation usable roof share: 80%
87
# estimation that only the south side of the building is used for pv
88
# see https://mediatum.ub.tum.de/doc/%20969497/969497.pdf
89
# AREA_FACTOR = 1.221
90
# USABLE_ROOF_SHARE = 0.8
91
# SOUTH_SHARE = 0.5
92
# ROOF_FACTOR = AREA_FACTOR * USABLE_ROOF_SHARE * SOUTH_SHARE
93
ROOF_FACTOR = 0.5
94
95
CAP_RANGES = [
96
    (0, 30 / 10**3),
97
    (30 / 10**3, 100 / 10**3),
98
    (100 / 10**3, float("inf")),
99
]
100
101
MIN_BUILDING_SIZE = 10.0
102
UPPER_QUANTILE = 0.95
103
LOWER_QUANTILE = 0.05
104
105
COLS_TO_EXPORT = [
106
    "scenario",
107
    "bus_id",
108
    "building_id",
109
    "gens_id",
110
    "capacity",
111
    "orientation_uniform",
112
    "orientation_primary",
113
    "orientation_primary_angle",
114
    "voltage_level",
115
    "weather_cell_id",
116
]
117
118
# TODO
119
INCLUDE_SYNTHETIC_BUILDINGS = True
120
ONLY_BUILDINGS_WITH_DEMAND = True
121
TEST_RUN = False
122
123
124
def timer_func(func):
125
    @wraps(func)
126
    def timeit_wrapper(*args, **kwargs):
127
        start_time = perf_counter()
128
        result = func(*args, **kwargs)
129
        end_time = perf_counter()
130
        total_time = end_time - start_time
131
        logger.debug(
132
            f"Function {func.__name__} took {total_time:.4f} seconds."
133
        )
134
        return result
135
136
    return timeit_wrapper
137
138
139
@timer_func
140
def mastr_data(
141
    index_col: str | int | list[str] | list[int],
142
) -> gpd.GeoDataFrame:
143
    """
144
    Read MaStR data from database.
145
146
    Parameters
147
    -----------
148
    index_col : str, int or list of str or int
149
        Column(s) to use as the row labels of the DataFrame.
150
    Returns
151
    -------
152
    pandas.DataFrame
153
        DataFrame containing MaStR data.
154
    """
155
    with db.session_scope() as session:
156
        query = session.query(EgonPowerPlantsPv).filter(
157
            EgonPowerPlantsPv.status == "InBetrieb",
158
            EgonPowerPlantsPv.site_type
159
            == ("Bauliche Anlagen (Hausdach, Gebäude und Fassade)"),
160
        )
161
162
    gdf = gpd.read_postgis(
163
        query.statement, query.session.bind, index_col=index_col
164
    ).drop(columns="id")
165
166
    logger.debug("MaStR data loaded.")
167
168
    return gdf
169
170
171
@timer_func
172
def clean_mastr_data(
173
    mastr_gdf: gpd.GeoDataFrame,
174
    max_realistic_pv_cap: int | float,
175
    min_realistic_pv_cap: int | float,
176
    seed: int,
177
) -> gpd.GeoDataFrame:
178
    """
179
    Clean the MaStR data from implausible data.
180
181
    * Drop MaStR ID duplicates.
182
    * Drop generators with implausible capacities.
183
184
    Parameters
185
    -----------
186
    mastr_gdf : pandas.DataFrame
187
        DataFrame containing MaStR data.
188
    max_realistic_pv_cap : int or float
189
        Maximum capacity, which is considered to be realistic.
190
    min_realistic_pv_cap : int or float
191
        Minimum capacity, which is considered to be realistic.
192
    seed : int
193
        Seed to use for random operations with NumPy and pandas.
194
    Returns
195
    -------
196
    pandas.DataFrame
197
        DataFrame containing cleaned MaStR data.
198
    """
199
    init_len = len(mastr_gdf)
200
201
    # drop duplicates
202
    mastr_gdf = mastr_gdf.loc[~mastr_gdf.index.duplicated()]
203
204
    # drop generators without any capacity info
205
    # and capacity of zero
206
    # and if the capacity is > 23.5 MW, because
207
    # Germanies largest rooftop PV is 23 MW
208
    # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379
209
    mastr_gdf = mastr_gdf.loc[
210
        ~mastr_gdf.capacity.isna()
211
        & (mastr_gdf.capacity <= max_realistic_pv_cap)
212
        & (mastr_gdf.capacity > min_realistic_pv_cap)
213
    ]
214
215
    # get consistent start-up date
216
    # randomly and weighted fill missing start-up dates
217
    pool = mastr_gdf.loc[
218
        ~mastr_gdf.commissioning_date.isna()
219
    ].commissioning_date.to_numpy()
220
221
    size = len(mastr_gdf) - len(pool)
222
223
    if size > 0:
224
        rng = default_rng(seed=seed)
225
226
        choice = rng.choice(
227
            pool,
228
            size=size,
229
            replace=False,
230
        )
231
232
        mastr_gdf.loc[mastr_gdf.commissioning_date.isna()] = mastr_gdf.loc[
233
            mastr_gdf.commissioning_date.isna()
234
        ].assign(commissioning_date=choice)
235
236
        logger.info(
237
            f"Randomly and weigthed added start-up date to {size} generators."
238
        )
239
240
    mastr_gdf = mastr_gdf.assign(
241
        commissioning_date=pd.to_datetime(
242
            mastr_gdf.commissioning_date, utc=True
243
        )
244
    )
245
246
    end_len = len(mastr_gdf)
247
    logger.debug(
248
        f"Dropped {init_len - end_len} "
249
        f"({((init_len - end_len) / init_len) * 100:g}%)"
250
        f" of {init_len} rows from MaStR DataFrame."
251
    )
252
253
    return mastr_gdf
254
255
256
@timer_func
257
def municipality_data() -> gpd.GeoDataFrame:
258
    """
259
    Get municipality data from eGo^n Database.
260
    Returns
261
    -------
262
    gepandas.GeoDataFrame
263
        GeoDataFrame with municipality data.
264
    """
265
    with db.session_scope() as session:
266
        query = session.query(Vg250Gem.ags, Vg250Gem.geometry.label("geom"))
267
268
    return gpd.read_postgis(
269
        query.statement, query.session.bind, index_col="ags"
270
    )
271
272
273
@timer_func
274
def add_ags_to_gens(
275
    mastr_gdf: gpd.GeoDataFrame,
276
    municipalities_gdf: gpd.GeoDataFrame,
277
) -> gpd.GeoDataFrame:
278
    """
279
    Add information about AGS ID to generators.
280
    Parameters
281
    -----------
282
    mastr_gdf : geopandas.GeoDataFrame
283
        GeoDataFrame with valid and cleaned MaStR data.
284
    municipalities_gdf : geopandas.GeoDataFrame
285
        GeoDataFrame with municipality data.
286
    Returns
287
    -------
288
    gepandas.GeoDataFrame
289
        GeoDataFrame with valid and cleaned MaStR data
290
        with AGS ID added.
291
    """
292
    return mastr_gdf.sjoin(
293
        municipalities_gdf,
294
        how="left",
295
        predicate="intersects",
296
    ).rename(columns={"index_right": "ags"})
297
298
299
def drop_gens_outside_muns(
300
    mastr_gdf: gpd.GeoDataFrame,
301
) -> gpd.GeoDataFrame:
302
    """
303
    Drop all generators outside of municipalities.
304
    Parameters
305
    -----------
306
    mastr_gdf : geopandas.GeoDataFrame
307
        GeoDataFrame with valid and cleaned MaStR data.
308
    Returns
309
    -------
310
    gepandas.GeoDataFrame
311
        GeoDataFrame with valid and cleaned MaStR data
312
        with generatos without an AGS ID dropped.
313
    """
314
    gdf = mastr_gdf.loc[~mastr_gdf.ags.isna()]
315
316
    logger.debug(
317
        f"{len(mastr_gdf) - len(gdf)} ("
318
        f"{(len(mastr_gdf) - len(gdf)) / len(mastr_gdf) * 100:g}%)"
319
        f" of {len(mastr_gdf)} values are outside of the municipalities"
320
        " and are therefore dropped."
321
    )
322
323
    return gdf
324
325
326
def load_mastr_data():
327
    """Read PV rooftop data from MaStR CSV
328
    Note: the source will be replaced as soon as the MaStR data is available
329
    in DB.
330
    Returns
331
    -------
332
    geopandas.GeoDataFrame
333
        GeoDataFrame containing MaStR data with geocoded locations.
334
    """
335
    mastr_gdf = mastr_data(
336
        MASTR_INDEX_COL,
337
    )
338
339
    clean_mastr_gdf = clean_mastr_data(
340
        mastr_gdf,
341
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
342
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
343
        seed=SEED,
344
    )
345
346
    municipalities_gdf = municipality_data()
347
348
    clean_mastr_gdf = add_ags_to_gens(clean_mastr_gdf, municipalities_gdf)
349
350
    return drop_gens_outside_muns(clean_mastr_gdf)
351
352
353
class OsmBuildingsFiltered(Base):
354
    """
355
    Class definition of table openstreetmap.osm_buildings_filtered.
356
357
    """
358
    __tablename__ = "osm_buildings_filtered"
359
    __table_args__ = {"schema": "openstreetmap"}
360
361
    osm_id = Column(BigInteger)
362
    amenity = Column(String)
363
    building = Column(String)
364
    name = Column(String)
365
    geom = Column(Geometry(srid=SRID), index=True)
366
    area = Column(Float)
367
    geom_point = Column(Geometry(srid=SRID), index=True)
368
    tags = Column(HSTORE)
369
    id = Column(BigInteger, primary_key=True, index=True)
370
371
372
@timer_func
373
def osm_buildings(
374
    to_crs: CRS,
375
) -> gpd.GeoDataFrame:
376
    """
377
    Read OSM buildings data from eGo^n Database.
378
    Parameters
379
    -----------
380
    to_crs : pyproj.crs.crs.CRS
381
        CRS to transform geometries to.
382
    Returns
383
    -------
384
    geopandas.GeoDataFrame
385
        GeoDataFrame containing OSM buildings data.
386
    """
387
    with db.session_scope() as session:
388
        query = session.query(
389
            OsmBuildingsFiltered.id,
390
            OsmBuildingsFiltered.area,
391
            OsmBuildingsFiltered.geom_point.label("geom"),
392
        )
393
394
    return gpd.read_postgis(
395
        query.statement, query.session.bind, index_col="id"
396
    ).to_crs(to_crs)
397
398
399
@timer_func
400
def synthetic_buildings(
401
    to_crs: CRS,
402
) -> gpd.GeoDataFrame:
403
    """
404
    Read synthetic buildings data from eGo^n Database.
405
    Parameters
406
    -----------
407
    to_crs : pyproj.crs.crs.CRS
408
        CRS to transform geometries to.
409
    Returns
410
    -------
411
    geopandas.GeoDataFrame
412
        GeoDataFrame containing OSM buildings data.
413
    """
414
    with db.session_scope() as session:
415
        query = session.query(
416
            OsmBuildingsSynthetic.id,
417
            OsmBuildingsSynthetic.area,
418
            OsmBuildingsSynthetic.geom_point.label("geom"),
419
        )
420
421
    return gpd.read_postgis(
422
        query.statement, query.session.bind, index_col="id"
423
    ).to_crs(to_crs)
424
425
426
@timer_func
427
def add_ags_to_buildings(
428
    buildings_gdf: gpd.GeoDataFrame,
429
    municipalities_gdf: gpd.GeoDataFrame,
430
) -> gpd.GeoDataFrame:
431
    """
432
    Add information about AGS ID to buildings.
433
    Parameters
434
    -----------
435
    buildings_gdf : geopandas.GeoDataFrame
436
        GeoDataFrame containing OSM buildings data.
437
    municipalities_gdf : geopandas.GeoDataFrame
438
        GeoDataFrame with municipality data.
439
    Returns
440
    -------
441
    gepandas.GeoDataFrame
442
        GeoDataFrame containing OSM buildings data
443
        with AGS ID added.
444
    """
445
    return buildings_gdf.sjoin(
446
        municipalities_gdf,
447
        how="left",
448
        predicate="intersects",
449
    ).rename(columns={"index_right": "ags"})
450
451
452
def drop_buildings_outside_muns(
453
    buildings_gdf: gpd.GeoDataFrame,
454
) -> gpd.GeoDataFrame:
455
    """
456
    Drop all buildings outside of municipalities.
457
    Parameters
458
    -----------
459
    buildings_gdf : geopandas.GeoDataFrame
460
        GeoDataFrame containing OSM buildings data.
461
    Returns
462
    -------
463
    gepandas.GeoDataFrame
464
        GeoDataFrame containing OSM buildings data
465
        with buildings without an AGS ID dropped.
466
    """
467
    gdf = buildings_gdf.loc[~buildings_gdf.ags.isna()]
468
469
    logger.debug(
470
        f"{len(buildings_gdf) - len(gdf)} "
471
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
472
        f"of {len(buildings_gdf)} values are outside of the municipalities "
473
        "and are therefore dropped."
474
    )
475
476
    return gdf
477
478
479
def egon_building_peak_loads():
480
481
    # use active scenario wich is closest to today
482
    scenario = sorted(SCENARIOS, key=get_scenario_year)[0]
483
484
    sql = f"""
485
    SELECT building_id
486
    FROM demand.egon_building_electricity_peak_loads
487
    WHERE scenario = '{scenario}'
488
    """
489
490
    return (
491
        db.select_dataframe(sql).building_id.astype(int).sort_values().unique()
492
    )
493
494
495
@timer_func
496
def load_building_data():
497
    """
498
    Read buildings from DB
499
    Tables:
500
501
    * `openstreetmap.osm_buildings_filtered` (from OSM)
502
    * `openstreetmap.osm_buildings_synthetic` (synthetic, created by us)
503
504
    Use column `id` for both as it is unique hence you concat both datasets.
505
    If INCLUDE_SYNTHETIC_BUILDINGS is False synthetic buildings will not be
506
    loaded.
507
508
    Returns
509
    -------
510
    gepandas.GeoDataFrame
511
        GeoDataFrame containing OSM buildings data with buildings without an
512
        AGS ID dropped.
513
    """
514
515
    municipalities_gdf = municipality_data()
516
517
    osm_buildings_gdf = osm_buildings(municipalities_gdf.crs)
518
519
    if INCLUDE_SYNTHETIC_BUILDINGS:
520
        synthetic_buildings_gdf = synthetic_buildings(municipalities_gdf.crs)
521
522
        buildings_gdf = gpd.GeoDataFrame(
523
            pd.concat(
524
                [
525
                    osm_buildings_gdf,
526
                    synthetic_buildings_gdf,
527
                ]
528
            ),
529
            geometry="geom",
530
            crs=osm_buildings_gdf.crs,
531
        ).rename(columns={"area": "building_area"})
532
533
        buildings_gdf.index = buildings_gdf.index.astype(int)
534
535
    else:
536
        buildings_gdf = osm_buildings_gdf.rename(
537
            columns={"area": "building_area"}
538
        )
539
540
    if ONLY_BUILDINGS_WITH_DEMAND:
541
        building_ids = egon_building_peak_loads()
542
543
        init_len = len(building_ids)
544
545
        building_ids = np.intersect1d(
546
            list(map(int, building_ids)),
547
            list(map(int, buildings_gdf.index.to_numpy())),
548
        )
549
550
        end_len = len(building_ids)
551
552
        logger.debug(
553
            f"{end_len/init_len * 100: g} % ({end_len} / {init_len}) "
554
            f"of buildings have peak load."
555
        )
556
557
        buildings_gdf = buildings_gdf.loc[building_ids]
558
559
    buildings_ags_gdf = add_ags_to_buildings(buildings_gdf, municipalities_gdf)
560
561
    buildings_ags_gdf = drop_buildings_outside_muns(buildings_ags_gdf)
562
563
    grid_districts_gdf = grid_districts(EPSG)
564
565
    federal_state_gdf = federal_state_data(grid_districts_gdf.crs)
566
567
    grid_federal_state_gdf = overlay_grid_districts_with_counties(
568
        grid_districts_gdf,
569
        federal_state_gdf,
570
    )
571
572
    buildings_overlay_gdf = add_overlay_id_to_buildings(
573
        buildings_ags_gdf,
574
        grid_federal_state_gdf,
575
    )
576
577
    logger.debug("Loaded buildings.")
578
579
    buildings_overlay_gdf = drop_buildings_outside_grids(buildings_overlay_gdf)
580
581
    # overwrite bus_id with data from new table
582
    sql = (
583
        "SELECT building_id, bus_id FROM "
584
        "boundaries.egon_map_zensus_mvgd_buildings"
585
    )
586
    map_building_bus_df = db.select_dataframe(sql)
587
588
    building_ids = np.intersect1d(
589
        list(map(int, map_building_bus_df.building_id.unique())),
590
        list(map(int, buildings_overlay_gdf.index.to_numpy())),
591
    )
592
593
    buildings_within_gdf = buildings_overlay_gdf.loc[building_ids]
594
595
    gdf = (
596
        buildings_within_gdf.reset_index()
597
        .drop(columns=["bus_id"])
598
        .merge(
599
            how="left",
600
            right=map_building_bus_df,
601
            left_on="id",
602
            right_on="building_id",
603
        )
604
        .drop(columns=["building_id"])
605
        .set_index("id")
606
        .sort_index()
607
    )
608
609
    return gdf[~gdf.index.duplicated(keep="first")]
610
611
612
@timer_func
613
def sort_and_qcut_df(
614
    df: pd.DataFrame | gpd.GeoDataFrame,
615
    col: str,
616
    q: int,
617
) -> pd.DataFrame | gpd.GeoDataFrame:
618
    """
619
    Determine the quantile of a given attribute in a (Geo)DataFrame.
620
    Sort the (Geo)DataFrame in ascending order for the given attribute.
621
    Parameters
622
    -----------
623
    df : pandas.DataFrame or geopandas.GeoDataFrame
624
        (Geo)DataFrame to sort and qcut.
625
    col : str
626
        Name of the attribute to sort and qcut the (Geo)DataFrame on.
627
    q : int
628
        Number of quantiles.
629
    Returns
630
    -------
631
    pandas.DataFrame or gepandas.GeoDataFrame
632
        Sorted and qcut (Geo)DataFrame.
633
    """
634
    df = df.sort_values(col, ascending=True)
635
636
    return df.assign(
637
        quant=pd.qcut(
638
            df[col],
639
            q=q,
640
            labels=range(q),
641
        )
642
    )
643
644
645
@timer_func
646
def allocate_pv(
647
    q_mastr_gdf: gpd.GeoDataFrame,
648
    q_buildings_gdf: gpd.GeoDataFrame,
649
    seed: int,
650
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
651
    """
652
    Allocate the MaStR pv generators to the OSM buildings.
653
    This will determine a building for each pv generator if there are more
654
    buildings than generators within a given AGS. Primarily generators are
655
    distributed with the same qunatile as the buildings. Multiple assignment
656
    is excluded.
657
    Parameters
658
    -----------
659
    q_mastr_gdf : geopandas.GeoDataFrame
660
        GeoDataFrame containing geocoded and qcut MaStR data.
661
    q_buildings_gdf : geopandas.GeoDataFrame
662
        GeoDataFrame containing qcut OSM buildings data.
663
    seed : int
664
        Seed to use for random operations with NumPy and pandas.
665
    Returns
666
    -------
667
    tuple with two geopandas.GeoDataFrame s
668
        GeoDataFrame containing MaStR data allocated to building IDs.
669
        GeoDataFrame containing building data allocated to MaStR IDs.
670
    """
671
    rng = default_rng(seed=seed)
672
673
    q_buildings_gdf = q_buildings_gdf.assign(gens_id=np.nan).sort_values(
674
        by=["ags", "quant"]
675
    )
676
    q_mastr_gdf = q_mastr_gdf.assign(building_id=np.nan).sort_values(
677
        by=["ags", "quant"]
678
    )
679
680
    ags_list = q_buildings_gdf.ags.unique()
681
682
    if TEST_RUN:
683
        ags_list = ags_list[:250]
684
685
    num_ags = len(ags_list)
686
687
    t0 = perf_counter()
688
689
    for count, ags in enumerate(ags_list):
690
        buildings = q_buildings_gdf.loc[q_buildings_gdf.ags == ags]
691
        gens = q_mastr_gdf.loc[q_mastr_gdf.ags == ags]
692
693
        len_build = len(buildings)
694
        len_gens = len(gens)
695
696
        if len_build < len_gens:
697
            gens = gens.sample(len_build, random_state=RandomState(seed=seed))
698
            logger.error(
699
                f"There are {len_gens} generators and only {len_build}"
700
                f" buildings in AGS {ags}. {len_gens - len(gens)} "
701
                "generators were truncated to match the amount of buildings."
702
            )
703
704
            assert len_build == len(gens)
705
706
        for quant in gens.quant.unique():
707
            q_buildings = buildings.loc[buildings.quant == quant]
708
            q_gens = gens.loc[gens.quant == quant]
709
710
            len_build = len(q_buildings)
711
            len_gens = len(q_gens)
712
713
            if len_build < len_gens:
714
                delta = len_gens - len_build
715
716
                logger.warning(
717
                    f"There are {len_gens} generators and only {len_build} "
718
                    f"buildings in AGS {ags} and quantile {quant}. {delta} "
719
                    f"buildings from AGS {ags} will be added randomly."
720
                )
721
722
                add_buildings = pd.Index(
723
                    rng.choice(
724
                        list(set(buildings.index) - set(q_buildings.index)),
725
                        size=delta,
726
                        replace=False,
727
                    )
728
                )
729
730
                chosen_buildings = q_buildings.index.union(add_buildings)
731
732
            else:
733
                chosen_buildings = rng.choice(
734
                    q_buildings.index,
735
                    size=len_gens,
736
                    replace=False,
737
                )
738
739
            q_buildings_gdf.loc[chosen_buildings, "gens_id"] = q_gens.index
740
            buildings = buildings.drop(chosen_buildings)
741
742
        if count % 500 == 0:
743
            logger.debug(
744
                f"Allocation of {count / num_ags * 100:g} % of AGS done. "
745
                f"It took {perf_counter() - t0:g} seconds."
746
            )
747
748
            t0 = perf_counter()
749
750
    assigned_buildings = q_buildings_gdf.loc[~q_buildings_gdf.gens_id.isna()]
751
752
    assert len(assigned_buildings) == len(assigned_buildings.gens_id.unique())
753
754
    q_mastr_gdf.loc[assigned_buildings.gens_id, "building_id"] = (
755
        assigned_buildings.index
756
    )
757
758
    assigned_gens = q_mastr_gdf.loc[~q_mastr_gdf.building_id.isna()]
759
760
    assert len(assigned_buildings) == len(assigned_gens)
761
762
    logger.debug("Allocated status quo generators to buildings.")
763
764
    return frame_to_numeric(q_mastr_gdf), frame_to_numeric(q_buildings_gdf)
765
766
767
def frame_to_numeric(
768
    df: pd.DataFrame | gpd.GeoDataFrame,
769
) -> pd.DataFrame | gpd.GeoDataFrame:
770
    """
771
    Try to convert all columns of a DataFrame to numeric ignoring errors.
772
    Parameters
773
    ----------
774
    df : pandas.DataFrame or geopandas.GeoDataFrame
775
    Returns
776
    -------
777
    pandas.DataFrame or geopandas.GeoDataFrame
778
    """
779
    if str(df.index.dtype) == "object":
780
        df.index = pd.to_numeric(df.index, errors="ignore")
781
782
    for col in df.columns:
783
        if str(df[col].dtype) == "object":
784
            df[col] = pd.to_numeric(df[col], errors="ignore")
785
786
    return df
787
788
789
def validate_output(
790
    desagg_mastr_gdf: pd.DataFrame | gpd.GeoDataFrame,
791
    desagg_buildings_gdf: pd.DataFrame | gpd.GeoDataFrame,
792
) -> None:
793
    """
794
    Validate output.
795
796
    * Validate that there are exactly as many buildings with a pv system as
797
      there are pv systems with a building
798
    * Validate that the building IDs with a pv system are the same building
799
      IDs as assigned to the pv systems
800
    * Validate that the pv system IDs with a building are the same pv system
801
      IDs as assigned to the buildings
802
803
    Parameters
804
    -----------
805
    desagg_mastr_gdf : geopandas.GeoDataFrame
806
        GeoDataFrame containing MaStR data allocated to building IDs.
807
    desagg_buildings_gdf : geopandas.GeoDataFrame
808
        GeoDataFrame containing building data allocated to MaStR IDs.
809
    """
810
    assert len(
811
        desagg_mastr_gdf.loc[~desagg_mastr_gdf.building_id.isna()]
812
    ) == len(desagg_buildings_gdf.loc[~desagg_buildings_gdf.gens_id.isna()])
813
    assert (
814
        np.sort(
815
            desagg_mastr_gdf.loc[
816
                ~desagg_mastr_gdf.building_id.isna()
817
            ].building_id.unique()
818
        )
819
        == np.sort(
820
            desagg_buildings_gdf.loc[
821
                ~desagg_buildings_gdf.gens_id.isna()
822
            ].index.unique()
823
        )
824
    ).all()
825
    assert (
826
        np.sort(
827
            desagg_mastr_gdf.loc[
828
                ~desagg_mastr_gdf.building_id.isna()
829
            ].index.unique()
830
        )
831
        == np.sort(
832
            desagg_buildings_gdf.loc[
833
                ~desagg_buildings_gdf.gens_id.isna()
834
            ].gens_id.unique()
835
        )
836
    ).all()
837
838
    logger.debug("Validated output.")
839
840
841
def drop_unallocated_gens(
842
    gdf: gpd.GeoDataFrame,
843
) -> gpd.GeoDataFrame:
844
    """
845
    Drop generators which did not get allocated.
846
847
    Parameters
848
    -----------
849
    gdf : geopandas.GeoDataFrame
850
        GeoDataFrame containing MaStR data allocated to building IDs.
851
    Returns
852
    -------
853
    geopandas.GeoDataFrame
854
        GeoDataFrame containing MaStR data with generators dropped which did
855
        not get allocated.
856
    """
857
    init_len = len(gdf)
858
    gdf = gdf.loc[~gdf.building_id.isna()]
859
    end_len = len(gdf)
860
861
    logger.debug(
862
        f"Dropped {init_len - end_len} "
863
        f"({((init_len - end_len) / init_len) * 100:g}%)"
864
        f" of {init_len} unallocated rows from MaStR DataFrame."
865
    )
866
867
    return gdf
868
869
870
@timer_func
871
def allocate_to_buildings(
872
    mastr_gdf: gpd.GeoDataFrame,
873
    buildings_gdf: gpd.GeoDataFrame,
874
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
875
    """
876
    Allocate status quo pv rooftop generators to buildings.
877
    Parameters
878
    -----------
879
    mastr_gdf : geopandas.GeoDataFrame
880
        GeoDataFrame containing MaStR data with geocoded locations.
881
    buildings_gdf : geopandas.GeoDataFrame
882
        GeoDataFrame containing OSM buildings data with buildings without an
883
        AGS ID dropped.
884
    Returns
885
    -------
886
    tuple with two geopandas.GeoDataFrame s
887
        GeoDataFrame containing MaStR data allocated to building IDs.
888
        GeoDataFrame containing building data allocated to MaStR IDs.
889
    """
890
    logger.debug("Starting allocation of status quo.")
891
892
    q_mastr_gdf = sort_and_qcut_df(mastr_gdf, col="capacity", q=Q)
893
    q_buildings_gdf = sort_and_qcut_df(buildings_gdf, col="building_area", q=Q)
894
895
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_pv(
896
        q_mastr_gdf, q_buildings_gdf, SEED
897
    )
898
899
    validate_output(desagg_mastr_gdf, desagg_buildings_gdf)
900
901
    return drop_unallocated_gens(desagg_mastr_gdf), desagg_buildings_gdf
902
903
904
@timer_func
905
def grid_districts(
906
    epsg: int,
907
) -> gpd.GeoDataFrame:
908
    """
909
    Load mv grid district geo data from eGo^n Database as
910
    geopandas.GeoDataFrame.
911
    Parameters
912
    -----------
913
    epsg : int
914
        EPSG ID to use as CRS.
915
    Returns
916
    -------
917
    geopandas.GeoDataFrame
918
        GeoDataFrame containing mv grid district ID and geo shapes data.
919
    """
920
    gdf = db.select_geodataframe(
921
        """
922
        SELECT bus_id, geom
923
        FROM grid.egon_mv_grid_district
924
        ORDER BY bus_id
925
        """,
926
        index_col="bus_id",
927
        geom_col="geom",
928
        epsg=epsg,
929
    )
930
931
    gdf.index = gdf.index.astype(int)
932
933
    logger.debug("Grid districts loaded.")
934
935
    return gdf
936
937
938
def scenario_data(
939
    carrier: str = "solar_rooftop",
940
    scenario: str = "eGon2035",
941
) -> pd.DataFrame:
942
    """
943
    Get scenario capacity data from eGo^n Database.
944
    Parameters
945
    -----------
946
    carrier : str
947
        Carrier type to filter table by.
948
    scenario : str
949
        Scenario to filter table by.
950
    Returns
951
    -------
952
    geopandas.GeoDataFrame
953
        GeoDataFrame with scenario capacity data in GW.
954
    """
955
    with db.session_scope() as session:
956
        query = session.query(EgonScenarioCapacities).filter(
957
            EgonScenarioCapacities.carrier == carrier,
958
            EgonScenarioCapacities.scenario_name == scenario,
959
        )
960
961
    df = pd.read_sql(
962
        query.statement, query.session.bind, index_col="index"
963
    ).sort_index()
964
965
    logger.debug("Scenario capacity data loaded.")
966
967
    return df
968
969
970 View Code Duplication
class Vg250Lan(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
971
    """
972
    Class definition of table boundaries.vg250_lan.
973
974
    """
975
    __tablename__ = "vg250_lan"
976
    __table_args__ = {"schema": "boundaries"}
977
978
    id = Column(BigInteger, primary_key=True, index=True)
979
    ade = Column(BigInteger)
980
    gf = Column(BigInteger)
981
    bsg = Column(BigInteger)
982
    ars = Column(String)
983
    ags = Column(String)
984
    sdv_ars = Column(String)
985
    gen = Column(String)
986
    bez = Column(String)
987
    ibz = Column(BigInteger)
988
    bem = Column(String)
989
    nbd = Column(String)
990
    sn_l = Column(String)
991
    sn_r = Column(String)
992
    sn_k = Column(String)
993
    sn_v1 = Column(String)
994
    sn_v2 = Column(String)
995
    sn_g = Column(String)
996
    fk_s3 = Column(String)
997
    nuts = Column(String)
998
    ars_0 = Column(String)
999
    ags_0 = Column(String)
1000
    wsk = Column(String)
1001
    debkg_id = Column(String)
1002
    rs = Column(String)
1003
    sdv_rs = Column(String)
1004
    rs_0 = Column(String)
1005
    geometry = Column(Geometry(srid=EPSG), index=True)
1006
1007
1008
def federal_state_data(to_crs: CRS) -> gpd.GeoDataFrame:
1009
    """
1010
    Get feder state data from eGo^n Database.
1011
    Parameters
1012
    -----------
1013
    to_crs : pyproj.crs.crs.CRS
1014
        CRS to transform geometries to.
1015
    Returns
1016
    -------
1017
    geopandas.GeoDataFrame
1018
        GeoDataFrame with federal state data.
1019
    """
1020
    with db.session_scope() as session:
1021
        query = session.query(
1022
            Vg250Lan.id, Vg250Lan.nuts, Vg250Lan.geometry.label("geom")
1023
        )
1024
1025
        gdf = gpd.read_postgis(
1026
            query.statement, session.connection(), index_col="id"
1027
        ).to_crs(to_crs)
1028
1029
    logger.debug("Federal State data loaded.")
1030
1031
    return gdf
1032
1033
1034
@timer_func
1035
def overlay_grid_districts_with_counties(
1036
    mv_grid_district_gdf: gpd.GeoDataFrame,
1037
    federal_state_gdf: gpd.GeoDataFrame,
1038
) -> gpd.GeoDataFrame:
1039
    """
1040
    Calculate the intersections of mv grid districts and counties.
1041
    Parameters
1042
    -----------
1043
    mv_grid_district_gdf : gpd.GeoDataFrame
1044
        GeoDataFrame containing mv grid district ID and geo shapes data.
1045
    federal_state_gdf : gpd.GeoDataFrame
1046
        GeoDataFrame with federal state data.
1047
    Returns
1048
    -------
1049
    geopandas.GeoDataFrame
1050
        GeoDataFrame containing OSM buildings data.
1051
    """
1052
    logger.debug(
1053
        "Calculating intersection overlay between mv grid districts and "
1054
        "counties. This may take a while..."
1055
    )
1056
1057
    gdf = gpd.overlay(
1058
        federal_state_gdf.to_crs(mv_grid_district_gdf.crs),
1059
        mv_grid_district_gdf.reset_index(),
1060
        how="intersection",
1061
        keep_geom_type=True,
1062
    )
1063
1064
    logger.debug("Done!")
1065
1066
    return gdf
1067
1068
1069
@timer_func
1070
def add_overlay_id_to_buildings(
1071
    buildings_gdf: gpd.GeoDataFrame,
1072
    grid_federal_state_gdf: gpd.GeoDataFrame,
1073
) -> gpd.GeoDataFrame:
1074
    """
1075
    Add information about overlay ID to buildings.
1076
    Parameters
1077
    -----------
1078
    buildings_gdf : geopandas.GeoDataFrame
1079
        GeoDataFrame containing OSM buildings data.
1080
    grid_federal_state_gdf : geopandas.GeoDataFrame
1081
        GeoDataFrame with intersection shapes between counties and grid
1082
        districts.
1083
    Returns
1084
    -------
1085
    geopandas.GeoDataFrame
1086
        GeoDataFrame containing OSM buildings data with overlay ID added.
1087
    """
1088
    gdf = (
1089
        buildings_gdf.to_crs(grid_federal_state_gdf.crs)
1090
        .sjoin(
1091
            grid_federal_state_gdf,
1092
            how="left",
1093
            predicate="intersects",
1094
        )
1095
        .rename(columns={"index_right": "overlay_id"})
1096
    )
1097
1098
    logger.debug("Added overlay ID to OSM buildings.")
1099
1100
    return gdf
1101
1102
1103
def drop_buildings_outside_grids(
1104
    buildings_gdf: gpd.GeoDataFrame,
1105
) -> gpd.GeoDataFrame:
1106
    """
1107
    Drop all buildings outside of grid areas.
1108
    Parameters
1109
    -----------
1110
    buildings_gdf : geopandas.GeoDataFrame
1111
        GeoDataFrame containing OSM buildings data.
1112
    Returns
1113
    -------
1114
    gepandas.GeoDataFrame
1115
        GeoDataFrame containing OSM buildings data
1116
        with buildings without an bus ID dropped.
1117
    """
1118
    gdf = buildings_gdf.loc[~buildings_gdf.bus_id.isna()]
1119
1120
    logger.debug(
1121
        f"{len(buildings_gdf) - len(gdf)} "
1122
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
1123
        f"of {len(buildings_gdf)} values are outside of the grid areas "
1124
        "and are therefore dropped."
1125
    )
1126
1127
    return gdf
1128
1129
1130
def cap_per_bus_id(
1131
    scenario: str,
1132
) -> pd.DataFrame:
1133
    """
1134
    Get table with total pv rooftop capacity per grid district.
1135
1136
    Parameters
1137
    -----------
1138
    scenario : str
1139
        Scenario name.
1140
    Returns
1141
    -------
1142
    pandas.DataFrame
1143
        DataFrame with total rooftop capacity per mv grid.
1144
    """
1145
    sources = config.datasets()["solar_rooftop"]["sources"]
1146
1147
    sql = f"""
1148
    SELECT bus_id, SUM(el_capacity) as capacity
1149
    FROM {sources['power_plants']['schema']}.{sources['power_plants']['table']}
1150
    WHERE carrier = 'solar_rooftop'
1151
    AND scenario = '{scenario}'
1152
    GROUP BY bus_id
1153
    """
1154
1155
    df = db.select_dataframe(sql, index_col="bus_id")
1156
1157
    return df
1158
1159
1160
def determine_end_of_life_gens(
1161
    mastr_gdf: gpd.GeoDataFrame,
1162
    scenario_timestamp: pd.Timestamp,
1163
    pv_rooftop_lifetime: pd.Timedelta,
1164
) -> gpd.GeoDataFrame:
1165
    """
1166
    Determine if an old PV system has reached its end of life.
1167
    Parameters
1168
    -----------
1169
    mastr_gdf : geopandas.GeoDataFrame
1170
        GeoDataFrame containing geocoded MaStR data.
1171
    scenario_timestamp : pandas.Timestamp
1172
        Timestamp at which the scenario takes place.
1173
    pv_rooftop_lifetime : pandas.Timedelta
1174
        Average expected lifetime of PV rooftop systems.
1175
    Returns
1176
    -------
1177
    geopandas.GeoDataFrame
1178
        GeoDataFrame containing geocoded MaStR data and info if the system
1179
        has reached its end of life.
1180
    """
1181
    before = mastr_gdf.capacity.sum()
1182
1183
    mastr_gdf = mastr_gdf.assign(
1184
        age=scenario_timestamp - mastr_gdf.commissioning_date
1185
    )
1186
1187
    mastr_gdf = mastr_gdf.assign(
1188
        end_of_life=pv_rooftop_lifetime < mastr_gdf.age
1189
    )
1190
1191
    after = mastr_gdf.loc[~mastr_gdf.end_of_life].capacity.sum()
1192
1193
    logger.debug(
1194
        f"Determined if pv rooftop systems reached their end of life.\nTotal "
1195
        f"capacity: {before}\nActive capacity: {after}"
1196
    )
1197
1198
    return mastr_gdf
1199
1200
1201
def calculate_max_pv_cap_per_building(
1202
    buildings_gdf: gpd.GeoDataFrame,
1203
    mastr_gdf: gpd.GeoDataFrame,
1204
    pv_cap_per_sq_m: float | int,
1205
    roof_factor: float | int,
1206
) -> gpd.GeoDataFrame:
1207
    """
1208
    Calculate the estimated maximum possible PV capacity per building.
1209
1210
    Parameters
1211
    -----------
1212
    buildings_gdf : geopandas.GeoDataFrame
1213
        GeoDataFrame containing OSM buildings data.
1214
    mastr_gdf : geopandas.GeoDataFrame
1215
        GeoDataFrame containing geocoded MaStR data.
1216
    pv_cap_per_sq_m : float, int
1217
        Average expected, installable PV capacity per square meter.
1218
    roof_factor : float, int
1219
        Average for PV usable roof area share.
1220
    Returns
1221
    -------
1222
    geopandas.GeoDataFrame
1223
        GeoDataFrame containing OSM buildings data with estimated maximum PV
1224
        capacity.
1225
    """
1226
    gdf = (
1227
        buildings_gdf.reset_index()
1228
        .rename(columns={"index": "id"})
1229
        .merge(
1230
            mastr_gdf[
1231
                [
1232
                    "capacity",
1233
                    "end_of_life",
1234
                    "building_id",
1235
                    "orientation_uniform",
1236
                    "orientation_primary",
1237
                    "orientation_primary_angle",
1238
                ]
1239
            ],
1240
            how="left",
1241
            left_on="id",
1242
            right_on="building_id",
1243
        )
1244
        .set_index("id")
1245
        .drop(columns="building_id")
1246
    )
1247
1248
    return gdf.assign(
1249
        max_cap=gdf.building_area.multiply(roof_factor * pv_cap_per_sq_m),
1250
        end_of_life=gdf.end_of_life.fillna(True).astype(bool),
1251
        bus_id=gdf.bus_id.astype(int),
1252
    )
1253
1254
1255
def calculate_building_load_factor(
1256
    mastr_gdf: gpd.GeoDataFrame,
1257
    buildings_gdf: gpd.GeoDataFrame,
1258
    rounding: int = 4,
1259
) -> gpd.GeoDataFrame:
1260
    """
1261
    Calculate the roof load factor from existing PV systems.
1262
    Parameters
1263
    -----------
1264
    mastr_gdf : geopandas.GeoDataFrame
1265
        GeoDataFrame containing geocoded MaStR data.
1266
    buildings_gdf : geopandas.GeoDataFrame
1267
        GeoDataFrame containing OSM buildings data.
1268
    rounding : int
1269
        Rounding to use for load factor.
1270
    Returns
1271
    -------
1272
    geopandas.GeoDataFrame
1273
        GeoDataFrame containing geocoded MaStR data with calculated load
1274
        factor.
1275
    """
1276
    gdf = mastr_gdf.merge(
1277
        buildings_gdf[["max_cap", "building_area"]]
1278
        .loc[~buildings_gdf["max_cap"].isna()]
1279
        .reset_index(),
1280
        how="left",
1281
        left_on="building_id",
1282
        right_on="id",
1283
    ).set_index("id")
1284
1285
    return gdf.assign(load_factor=(gdf.capacity / gdf.max_cap).round(rounding))
1286
1287
1288
def get_probability_for_property(
1289
    mastr_gdf: gpd.GeoDataFrame,
1290
    cap_range: tuple[int | float, int | float],
1291
    prop: str,
1292
) -> tuple[np.array, np.array]:
1293
    """
1294
    Calculate the probability of the different options of a property of the
1295
    existing PV plants.
1296
    Parameters
1297
    -----------
1298
    mastr_gdf : geopandas.GeoDataFrame
1299
        GeoDataFrame containing geocoded MaStR data.
1300
    cap_range : tuple(int, int)
1301
        Capacity range of PV plants to look at.
1302
    prop : str
1303
        Property to calculate probabilities for. String needs to be in columns
1304
        of mastr_gdf.
1305
    Returns
1306
    -------
1307
    tuple
1308
        numpy.array
1309
            Unique values of property.
1310
        numpy.array
1311
            Probabilties per unique value.
1312
    """
1313
    cap_range_gdf = mastr_gdf.loc[
1314
        (mastr_gdf.capacity > cap_range[0])
1315
        & (mastr_gdf.capacity <= cap_range[1])
1316
    ]
1317
1318
    if prop == "load_factor":
1319
        cap_range_gdf = cap_range_gdf.loc[cap_range_gdf[prop] <= 1]
1320
1321
    count = Counter(
1322
        cap_range_gdf[prop].loc[
1323
            ~cap_range_gdf[prop].isna()
1324
            & ~cap_range_gdf[prop].isnull()
1325
            & ~(cap_range_gdf[prop] == "None")
1326
        ]
1327
    )
1328
1329
    values = np.array(list(count.keys()))
1330
    probabilities = np.fromiter(count.values(), dtype=float)
1331
    probabilities = probabilities / np.sum(probabilities)
1332
1333
    return values, probabilities
1334
1335
1336
@timer_func
1337
def probabilities(
1338
    mastr_gdf: gpd.GeoDataFrame,
1339
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1340
    properties: list[str] | None = None,
1341
) -> dict:
1342
    """
1343
    Calculate the probability of the different options of properties of the
1344
    existing PV plants.
1345
    Parameters
1346
    -----------
1347
    mastr_gdf : geopandas.GeoDataFrame
1348
        GeoDataFrame containing geocoded MaStR data.
1349
    cap_ranges : list(tuple(int, int))
1350
        List of capacity ranges to distinguish between. The first tuple should
1351
        start with a zero and the last one should end with infinite.
1352
    properties : list(str)
1353
        List of properties to calculate probabilities for. Strings need to be
1354
        in columns of mastr_gdf.
1355
    Returns
1356
    -------
1357
    dict
1358
        Dictionary with values and probabilities per capacity range.
1359
    """
1360
    if cap_ranges is None:
1361
        cap_ranges = [
1362
            (0, 30 / 10**3),
1363
            (30 / 10**3, 100 / 10**3),
1364
            (100 / 10**3, float("inf")),
1365
        ]
1366
    if properties is None:
1367
        properties = [
1368
            "orientation_uniform",
1369
            "orientation_primary",
1370
            "orientation_primary_angle",
1371
            "load_factor",
1372
        ]
1373
1374
    prob_dict = {}
1375
1376
    for cap_range in cap_ranges:
1377
        prob_dict[cap_range] = {
1378
            "values": {},
1379
            "probabilities": {},
1380
        }
1381
1382
        for prop in properties:
1383
            v, p = get_probability_for_property(
1384
                mastr_gdf,
1385
                cap_range,
1386
                prop,
1387
            )
1388
1389
            prob_dict[cap_range]["values"][prop] = v
1390
            prob_dict[cap_range]["probabilities"][prop] = p
1391
1392
    return prob_dict
1393
1394
1395
def cap_share_per_cap_range(
1396
    mastr_gdf: gpd.GeoDataFrame,
1397
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1398
) -> dict[tuple[int | float, int | float], float]:
1399
    """
1400
    Calculate the share of PV capacity from the total PV capacity within
1401
    capacity ranges.
1402
1403
    Parameters
1404
    -----------
1405
    mastr_gdf : geopandas.GeoDataFrame
1406
        GeoDataFrame containing geocoded MaStR data.
1407
    cap_ranges : list(tuple(int, int))
1408
        List of capacity ranges to distinguish between. The first tuple should
1409
        start with a zero and the last one should end with infinite.
1410
    Returns
1411
    -------
1412
    dict
1413
        Dictionary with share of PV capacity from the total PV capacity within
1414
        capacity ranges.
1415
    """
1416
    if cap_ranges is None:
1417
        cap_ranges = [
1418
            (0, 30 / 10**3),
1419
            (30 / 10**3, 100 / 10**3),
1420
            (100 / 10**3, float("inf")),
1421
        ]
1422
1423
    cap_share_dict = {}
1424
1425
    total_cap = mastr_gdf.capacity.sum()
1426
1427
    for cap_range in cap_ranges:
1428
        cap_share = (
1429
            mastr_gdf.loc[
1430
                (mastr_gdf.capacity > cap_range[0])
1431
                & (mastr_gdf.capacity <= cap_range[1])
1432
            ].capacity.sum()
1433
            / total_cap
1434
        )
1435
1436
        cap_share_dict[cap_range] = cap_share
1437
1438
    return cap_share_dict
1439
1440
1441
def mean_load_factor_per_cap_range(
1442
    mastr_gdf: gpd.GeoDataFrame,
1443
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1444
) -> dict[tuple[int | float, int | float], float]:
1445
    """
1446
    Calculate the mean roof load factor per capacity range from existing PV
1447
    plants.
1448
    Parameters
1449
    -----------
1450
    mastr_gdf : geopandas.GeoDataFrame
1451
        GeoDataFrame containing geocoded MaStR data.
1452
    cap_ranges : list(tuple(int, int))
1453
        List of capacity ranges to distinguish between. The first tuple should
1454
        start with a zero and the last one should end with infinite.
1455
    Returns
1456
    -------
1457
    dict
1458
        Dictionary with mean roof load factor per capacity range.
1459
    """
1460
    if cap_ranges is None:
1461
        cap_ranges = [
1462
            (0, 30 / 10**3),
1463
            (30 / 10**3, 100 / 10**3),
1464
            (100 / 10**3, float("inf")),
1465
        ]
1466
1467
    load_factor_dict = {}
1468
1469
    for cap_range in cap_ranges:
1470
        load_factor = mastr_gdf.loc[
1471
            (mastr_gdf.load_factor <= 1)
1472
            & (mastr_gdf.capacity > cap_range[0])
1473
            & (mastr_gdf.capacity <= cap_range[1])
1474
        ].load_factor.mean()
1475
1476
        load_factor_dict[cap_range] = load_factor
1477
1478
    return load_factor_dict
1479
1480
1481
def building_area_range_per_cap_range(
1482
    mastr_gdf: gpd.GeoDataFrame,
1483
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1484
    min_building_size: int | float = 10.0,
1485
    upper_quantile: float = 0.95,
1486
    lower_quantile: float = 0.05,
1487
) -> dict[tuple[int | float, int | float], tuple[int | float, int | float]]:
1488
    """
1489
    Estimate normal building area range per capacity range.
1490
    Calculate the mean roof load factor per capacity range from existing PV
1491
    plants.
1492
    Parameters
1493
    -----------
1494
    mastr_gdf : geopandas.GeoDataFrame
1495
        GeoDataFrame containing geocoded MaStR data.
1496
    cap_ranges : list(tuple(int, int))
1497
        List of capacity ranges to distinguish between. The first tuple should
1498
        start with a zero and the last one should end with infinite.
1499
    min_building_size : int, float
1500
        Minimal building size to consider for PV plants.
1501
    upper_quantile : float
1502
        Upper quantile to estimate maximum building size per capacity range.
1503
    lower_quantile : float
1504
        Lower quantile to estimate minimum building size per capacity range.
1505
    Returns
1506
    -------
1507
    dict
1508
        Dictionary with estimated normal building area range per capacity
1509
        range.
1510
    """
1511
    if cap_ranges is None:
1512
        cap_ranges = [
1513
            (0, 30 / 10**3),
1514
            (30 / 10**3, 100 / 10**3),
1515
            (100 / 10**3, float("inf")),
1516
        ]
1517
1518
    building_area_range_dict = {}
1519
1520
    n_ranges = len(cap_ranges)
1521
1522
    for count, cap_range in enumerate(cap_ranges):
1523
        cap_range_gdf = mastr_gdf.loc[
1524
            (mastr_gdf.capacity > cap_range[0])
1525
            & (mastr_gdf.capacity <= cap_range[1])
1526
        ]
1527
1528
        if count == 0:
1529
            building_area_range_dict[cap_range] = (
1530
                min_building_size,
1531
                cap_range_gdf.building_area.quantile(upper_quantile),
1532
            )
1533
        elif count == n_ranges - 1:
1534
            building_area_range_dict[cap_range] = (
1535
                cap_range_gdf.building_area.quantile(lower_quantile),
1536
                float("inf"),
1537
            )
1538
        else:
1539
            building_area_range_dict[cap_range] = (
1540
                cap_range_gdf.building_area.quantile(lower_quantile),
1541
                cap_range_gdf.building_area.quantile(upper_quantile),
1542
            )
1543
1544
    values = list(building_area_range_dict.values())
1545
1546
    building_area_range_normed_dict = {}
1547
1548
    for count, (cap_range, (min_area, max_area)) in enumerate(
1549
        building_area_range_dict.items()
1550
    ):
1551
        if count == 0:
1552
            building_area_range_normed_dict[cap_range] = (
1553
                min_area,
1554
                np.mean((values[count + 1][0], max_area)),
1555
            )
1556
        elif count == n_ranges - 1:
1557
            building_area_range_normed_dict[cap_range] = (
1558
                np.mean((values[count - 1][1], min_area)),
1559
                max_area,
1560
            )
1561
        else:
1562
            building_area_range_normed_dict[cap_range] = (
1563
                np.mean((values[count - 1][1], min_area)),
1564
                np.mean((values[count + 1][0], max_area)),
1565
            )
1566
1567
    return building_area_range_normed_dict
1568
1569
1570
@timer_func
1571
def desaggregate_pv_in_mv_grid(
1572
    buildings_gdf: gpd.GeoDataFrame,
1573
    pv_cap: float | int,
1574
    **kwargs,
1575
) -> gpd.GeoDataFrame:
1576
    """
1577
    Desaggregate PV capacity on buildings within a given grid district.
1578
    Parameters
1579
    -----------
1580
    buildings_gdf : geopandas.GeoDataFrame
1581
        GeoDataFrame containing buildings within the grid district.
1582
    pv_cap : float, int
1583
        PV capacity to desaggregate.
1584
    Other Parameters
1585
    -----------
1586
    prob_dict : dict
1587
        Dictionary with values and probabilities per capacity range.
1588
    cap_share_dict : dict
1589
        Dictionary with share of PV capacity from the total PV capacity within
1590
        capacity ranges.
1591
    building_area_range_dict : dict
1592
        Dictionary with estimated normal building area range per capacity
1593
        range.
1594
    load_factor_dict : dict
1595
        Dictionary with mean roof load factor per capacity range.
1596
    seed : int
1597
        Seed to use for random operations with NumPy and pandas.
1598
    pv_cap_per_sq_m : float, int
1599
        Average expected, installable PV capacity per square meter.
1600
    Returns
1601
    -------
1602
    geopandas.GeoDataFrame
1603
        GeoDataFrame containing OSM building data with desaggregated PV
1604
        plants.
1605
    """
1606
    bus_id = int(buildings_gdf.bus_id.iat[0])
1607
1608
    rng = default_rng(seed=kwargs["seed"])
1609
    random_state = RandomState(seed=kwargs["seed"])
1610
1611
    results_df = pd.DataFrame(columns=buildings_gdf.columns)
1612
1613
    for cap_range, share in kwargs["cap_share_dict"].items():
1614
        pv_cap_range = pv_cap * share
1615
1616
        b_area_min, b_area_max = kwargs["building_area_range_dict"][cap_range]
1617
1618
        cap_range_buildings_gdf = buildings_gdf.loc[
1619
            ~buildings_gdf.index.isin(results_df.index)
1620
            & (buildings_gdf.building_area > b_area_min)
1621
            & (buildings_gdf.building_area <= b_area_max)
1622
        ]
1623
1624
        mean_load_factor = kwargs["load_factor_dict"][cap_range]
1625
        cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1626
            mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1627
            load_factor=np.nan,
1628
            capacity=np.nan,
1629
        )
1630
1631
        total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1632
1633
        if total_mean_cap == 0:
1634
            logger.warning(
1635
                f"There are no matching roof for capacity range {cap_range} "
1636
                f"kW in grid {bus_id}. Using all buildings as fallback."
1637
            )
1638
1639
            cap_range_buildings_gdf = buildings_gdf.loc[
1640
                ~buildings_gdf.index.isin(results_df.index)
1641
            ]
1642
1643
            if len(cap_range_buildings_gdf) == 0:
1644
                logger.warning(
1645
                    "There are no roofes available for capacity range "
1646
                    f"{cap_range} kW in grid {bus_id}. Allowing dual use."
1647
                )
1648
                cap_range_buildings_gdf = buildings_gdf.copy()
1649
1650
            cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1651
                mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1652
                load_factor=np.nan,
1653
                capacity=np.nan,
1654
            )
1655
1656
            total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1657
1658
        elif total_mean_cap < pv_cap_range:
1659
            logger.warning(
1660
                f"Average roof utilization of the roof area in grid {bus_id} "
1661
                f"and capacity range {cap_range} kW is not sufficient. The "
1662
                "roof utilization will be above average."
1663
            )
1664
1665
        frac = max(
1666
            pv_cap_range / total_mean_cap,
1667
            1 / len(cap_range_buildings_gdf),
1668
        )
1669
1670
        samples_gdf = cap_range_buildings_gdf.sample(
1671
            frac=min(1, frac),
1672
            random_state=random_state,
1673
        )
1674
1675
        cap_range_dict = kwargs["prob_dict"][cap_range]
1676
1677
        values_dict = cap_range_dict["values"]
1678
        p_dict = cap_range_dict["probabilities"]
1679
1680
        load_factors = rng.choice(
1681
            a=values_dict["load_factor"],
1682
            size=len(samples_gdf),
1683
            p=p_dict["load_factor"],
1684
        )
1685
1686
        samples_gdf = samples_gdf.assign(
1687
            load_factor=load_factors,
1688
            capacity=(
1689
                samples_gdf.building_area
1690
                * load_factors
1691
                * kwargs["pv_cap_per_sq_m"]
1692
            ).clip(lower=0.4),
1693
        )
1694
1695
        missing_factor = pv_cap_range / samples_gdf.capacity.sum()
1696
1697
        samples_gdf = samples_gdf.assign(
1698
            capacity=(samples_gdf.capacity * missing_factor),
1699
            load_factor=(samples_gdf.load_factor * missing_factor),
1700
        )
1701
1702
        assert np.isclose(
1703
            samples_gdf.capacity.sum(),
1704
            pv_cap_range,
1705
            rtol=1e-03,
1706
        ), f"{samples_gdf.capacity.sum()} != {pv_cap_range}"
1707
1708
        results_df = pd.concat(
1709
            [
1710
                results_df,
1711
                samples_gdf,
1712
            ],
1713
        )
1714
1715
    total_missing_factor = pv_cap / results_df.capacity.sum()
1716
1717
    results_df = results_df.assign(
1718
        capacity=(results_df.capacity * total_missing_factor),
1719
    )
1720
1721
    assert np.isclose(
1722
        results_df.capacity.sum(),
1723
        pv_cap,
1724
        rtol=1e-03,
1725
    ), f"{results_df.capacity.sum()} != {pv_cap}"
1726
1727
    return gpd.GeoDataFrame(
1728
        results_df,
1729
        crs=samples_gdf.crs,
0 ignored issues
show
introduced by
The variable samples_gdf does not seem to be defined in case the for loop on line 1613 is not entered. Are you sure this can never be the case?
Loading history...
1730
        geometry="geom",
1731
    )
1732
1733
1734
@timer_func
1735
def desaggregate_pv(
1736
    buildings_gdf: gpd.GeoDataFrame,
1737
    cap_df: pd.DataFrame,
1738
    **kwargs,
1739
) -> gpd.GeoDataFrame:
1740
    """
1741
    Desaggregate PV capacity on buildings within a given grid district.
1742
1743
    Parameters
1744
    -----------
1745
    buildings_gdf : geopandas.GeoDataFrame
1746
        GeoDataFrame containing OSM buildings data.
1747
    cap_df : pandas.DataFrame
1748
        DataFrame with total rooftop capacity per mv grid.
1749
    Other Parameters
1750
    -----------
1751
    prob_dict : dict
1752
        Dictionary with values and probabilities per capacity range.
1753
    cap_share_dict : dict
1754
        Dictionary with share of PV capacity from the total PV capacity within
1755
        capacity ranges.
1756
    building_area_range_dict : dict
1757
        Dictionary with estimated normal building area range per capacity
1758
        range.
1759
    load_factor_dict : dict
1760
        Dictionary with mean roof load factor per capacity range.
1761
    seed : int
1762
        Seed to use for random operations with NumPy and pandas.
1763
    pv_cap_per_sq_m : float, int
1764
        Average expected, installable PV capacity per square meter.
1765
    Returns
1766
    -------
1767
    geopandas.GeoDataFrame
1768
        GeoDataFrame containing OSM building data with desaggregated PV
1769
        plants.
1770
    """
1771
    allocated_buildings_gdf = buildings_gdf.loc[~buildings_gdf.end_of_life]
1772
1773
    building_bus_ids = set(buildings_gdf.bus_id)
1774
    cap_bus_ids = set(cap_df.index)
1775
1776
    logger.debug(
1777
        f"Bus IDs from buildings: {len(building_bus_ids)}\nBus IDs from "
1778
        f"capacity: {len(cap_bus_ids)}"
1779
    )
1780
1781
    if len(building_bus_ids) > len(cap_bus_ids):
1782
        missing = building_bus_ids - cap_bus_ids
1783
    else:
1784
        missing = cap_bus_ids - building_bus_ids
1785
1786
    logger.debug(str(missing))
1787
1788
    bus_ids = np.intersect1d(list(building_bus_ids), list(cap_bus_ids))
1789
1790
    # assert set(buildings_gdf.bus_id.unique()) == set(cap_df.index)
1791
1792
    for bus_id in bus_ids:
1793
        buildings_grid_gdf = buildings_gdf.loc[buildings_gdf.bus_id == bus_id]
1794
1795
        pv_installed_gdf = buildings_grid_gdf.loc[
1796
            ~buildings_grid_gdf.end_of_life
1797
        ]
1798
1799
        pv_installed = pv_installed_gdf.capacity.sum()
1800
1801
        pot_buildings_gdf = buildings_grid_gdf.drop(
1802
            index=pv_installed_gdf.index
1803
        )
1804
1805
        if len(pot_buildings_gdf) == 0:
1806
            logger.error(
1807
                f"In grid {bus_id} there are no potential buildings to "
1808
                f"allocate PV capacity to. The grid is skipped. This message "
1809
                f"should only appear doing test runs with few buildings."
1810
            )
1811
1812
            continue
1813
1814
        pv_target = cap_df.at[bus_id, "capacity"]
1815
1816
        logger.debug(f"pv_target: {pv_target}")
1817
1818
        pv_missing = pv_target - pv_installed
1819
1820
        if pv_missing <= 0:
1821
            logger.warning(
1822
                f"In grid {bus_id} there is more PV installed "
1823
                f"({pv_installed: g} kW) in status Quo than allocated within "
1824
                f"the scenario ({pv_target: g} kW). "
1825
                f"No new generators are added."
1826
            )
1827
1828
            continue
1829
1830
        if pot_buildings_gdf.max_cap.sum() < pv_missing:
1831
            logger.error(
1832
                f"In grid {bus_id} there is less PV potential ("
1833
                f"{pot_buildings_gdf.max_cap.sum():g} MW) than allocated PV "
1834
                f"capacity ({pv_missing:g} MW). The average roof utilization "
1835
                f"will be very high."
1836
            )
1837
1838
        gdf = desaggregate_pv_in_mv_grid(
1839
            buildings_gdf=pot_buildings_gdf,
1840
            pv_cap=pv_missing,
1841
            **kwargs,
1842
        )
1843
1844
        logger.debug(f"New cap in grid {bus_id}: {gdf.capacity.sum()}")
1845
        logger.debug(f"Installed cap in grid {bus_id}: {pv_installed}")
1846
        logger.debug(
1847
            f"Total cap in grid {bus_id}: {gdf.capacity.sum() + pv_installed}"
1848
        )
1849
1850
        if not np.isclose(
1851
            gdf.capacity.sum() + pv_installed, pv_target, rtol=1e-3
1852
        ):
1853
            logger.warning(
1854
                f"The desired capacity and actual capacity in grid {bus_id} "
1855
                f"differ.\n"
1856
                f"Desired cap: {pv_target}\nActual cap: "
1857
                f"{gdf.capacity.sum() + pv_installed}"
1858
            )
1859
1860
        pre_cap = allocated_buildings_gdf.capacity.sum()
1861
        new_cap = gdf.capacity.sum()
1862
1863
        allocated_buildings_gdf = pd.concat(
1864
            [
1865
                allocated_buildings_gdf,
1866
                gdf,
1867
            ]
1868
        )
1869
1870
        total_cap = allocated_buildings_gdf.capacity.sum()
1871
1872
        assert np.isclose(pre_cap + new_cap, total_cap)
1873
1874
    logger.debug("Desaggregated scenario.")
1875
    logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}")
1876
    logger.debug(
1877
        f"Generator capacity: " f"{allocated_buildings_gdf.capacity.sum(): g}"
1878
    )
1879
1880
    return gpd.GeoDataFrame(
1881
        allocated_buildings_gdf,
1882
        crs=gdf.crs,
0 ignored issues
show
introduced by
The variable gdf does not seem to be defined for all execution paths.
Loading history...
1883
        geometry="geom",
1884
    )
1885
1886
1887
@timer_func
1888
def add_buildings_meta_data(
1889
    buildings_gdf: gpd.GeoDataFrame,
1890
    prob_dict: dict,
1891
    seed: int,
1892
) -> gpd.GeoDataFrame:
1893
    """
1894
    Randomly add additional metadata to desaggregated PV plants.
1895
    Parameters
1896
    -----------
1897
    buildings_gdf : geopandas.GeoDataFrame
1898
        GeoDataFrame containing OSM buildings data with desaggregated PV
1899
        plants.
1900
    prob_dict : dict
1901
        Dictionary with values and probabilities per capacity range.
1902
    seed : int
1903
        Seed to use for random operations with NumPy and pandas.
1904
    Returns
1905
    -------
1906
    geopandas.GeoDataFrame
1907
        GeoDataFrame containing OSM building data with desaggregated PV
1908
        plants.
1909
    """
1910
    rng = default_rng(seed=seed)
1911
    buildings_gdf = buildings_gdf.reset_index().rename(
1912
        columns={
1913
            "index": "building_id",
1914
        }
1915
    )
1916
1917
    for (min_cap, max_cap), cap_range_prob_dict in prob_dict.items():
1918
        cap_range_gdf = buildings_gdf.loc[
1919
            (buildings_gdf.capacity >= min_cap)
1920
            & (buildings_gdf.capacity < max_cap)
1921
        ]
1922
1923
        for key, values in cap_range_prob_dict["values"].items():
1924
            if key == "load_factor":
1925
                continue
1926
1927
            gdf = cap_range_gdf.loc[
1928
                cap_range_gdf[key].isna()
1929
                | cap_range_gdf[key].isnull()
1930
                | (cap_range_gdf[key] == "None")
1931
            ]
1932
1933
            key_vals = rng.choice(
1934
                a=values,
1935
                size=len(gdf),
1936
                p=cap_range_prob_dict["probabilities"][key],
1937
            )
1938
1939
            buildings_gdf.loc[gdf.index, key] = key_vals
1940
1941
    return buildings_gdf
1942
1943
1944
def add_commissioning_date(
1945
    buildings_gdf: gpd.GeoDataFrame,
1946
    start: pd.Timestamp,
1947
    end: pd.Timestamp,
1948
    seed: int,
1949
):
1950
    """
1951
    Randomly and linear add start-up date to new pv generators.
1952
    Parameters
1953
    ----------
1954
    buildings_gdf : geopandas.GeoDataFrame
1955
        GeoDataFrame containing OSM buildings data with desaggregated PV
1956
        plants.
1957
    start : pandas.Timestamp
1958
        Minimum Timestamp to use.
1959
    end : pandas.Timestamp
1960
        Maximum Timestamp to use.
1961
    seed : int
1962
        Seed to use for random operations with NumPy and pandas.
1963
    Returns
1964
    -------
1965
    geopandas.GeoDataFrame
1966
        GeoDataFrame containing OSM buildings data with start-up date added.
1967
    """
1968
    rng = default_rng(seed=seed)
1969
1970
    date_range = pd.date_range(start=start, end=end, freq="1D")
1971
1972
    return buildings_gdf.assign(
1973
        commissioning_date=rng.choice(date_range, size=len(buildings_gdf))
1974
    )
1975
1976
1977
@timer_func
1978
def allocate_scenarios(
1979
    mastr_gdf: gpd.GeoDataFrame,
1980
    valid_buildings_gdf: gpd.GeoDataFrame,
1981
    last_scenario_gdf: gpd.GeoDataFrame,
1982
    scenario: str,
1983
):
1984
    """
1985
    Desaggregate and allocate scenario pv rooftop ramp-ups onto buildings.
1986
    Parameters
1987
    ----------
1988
    mastr_gdf : geopandas.GeoDataFrame
1989
        GeoDataFrame containing geocoded MaStR data.
1990
    valid_buildings_gdf : geopandas.GeoDataFrame
1991
        GeoDataFrame containing OSM buildings data.
1992
    last_scenario_gdf : geopandas.GeoDataFrame
1993
        GeoDataFrame containing OSM buildings matched with pv generators from
1994
        temporally preceding scenario.
1995
    scenario : str
1996
        Scenario to desaggrgate and allocate.
1997
    Returns
1998
    -------
1999
    tuple
2000
        geopandas.GeoDataFrame
2001
            GeoDataFrame containing OSM buildings matched with pv generators.
2002
        pandas.DataFrame
2003
            DataFrame containing pv rooftop capacity per grid id.
2004
    """
2005
    cap_per_bus_id_df = cap_per_bus_id(scenario)
2006
2007
    logger.debug(
2008
        f"cap_per_bus_id_df total capacity: {cap_per_bus_id_df.capacity.sum()}"
2009
    )
2010
2011
    last_scenario_gdf = determine_end_of_life_gens(
2012
        last_scenario_gdf,
2013
        SCENARIO_TIMESTAMP[scenario],
2014
        PV_ROOFTOP_LIFETIME,
2015
    )
2016
2017
    buildings_gdf = calculate_max_pv_cap_per_building(
2018
        valid_buildings_gdf,
2019
        last_scenario_gdf,
2020
        PV_CAP_PER_SQ_M,
2021
        ROOF_FACTOR,
2022
    )
2023
2024
    mastr_gdf = calculate_building_load_factor(
2025
        mastr_gdf,
2026
        buildings_gdf,
2027
    )
2028
2029
    probabilities_dict = probabilities(
2030
        mastr_gdf,
2031
        cap_ranges=CAP_RANGES,
2032
    )
2033
2034
    cap_share_dict = cap_share_per_cap_range(
2035
        mastr_gdf,
2036
        cap_ranges=CAP_RANGES,
2037
    )
2038
2039
    load_factor_dict = mean_load_factor_per_cap_range(
2040
        mastr_gdf,
2041
        cap_ranges=CAP_RANGES,
2042
    )
2043
2044
    building_area_range_dict = building_area_range_per_cap_range(
2045
        mastr_gdf,
2046
        cap_ranges=CAP_RANGES,
2047
        min_building_size=MIN_BUILDING_SIZE,
2048
        upper_quantile=UPPER_QUANTILE,
2049
        lower_quantile=LOWER_QUANTILE,
2050
    )
2051
2052
    allocated_buildings_gdf = desaggregate_pv(
2053
        buildings_gdf=buildings_gdf,
2054
        cap_df=cap_per_bus_id_df,
2055
        prob_dict=probabilities_dict,
2056
        cap_share_dict=cap_share_dict,
2057
        building_area_range_dict=building_area_range_dict,
2058
        load_factor_dict=load_factor_dict,
2059
        seed=SEED,
2060
        pv_cap_per_sq_m=PV_CAP_PER_SQ_M,
2061
    )
2062
2063
    allocated_buildings_gdf = allocated_buildings_gdf.assign(scenario=scenario)
2064
2065
    meta_buildings_gdf = frame_to_numeric(
2066
        add_buildings_meta_data(
2067
            allocated_buildings_gdf,
2068
            probabilities_dict,
2069
            SEED,
2070
        )
2071
    )
2072
2073
    return (
2074
        add_commissioning_date(
2075
            meta_buildings_gdf,
2076
            start=last_scenario_gdf.commissioning_date.max(),
2077
            end=SCENARIO_TIMESTAMP[scenario],
2078
            seed=SEED,
2079
        ),
2080
        cap_per_bus_id_df,
2081
    )
2082
2083
2084
class EgonPowerPlantPvRoofBuilding(Base):
2085
    """
2086
    Class definition of table supply.egon_power_plants_pv_roof_building.
2087
2088
    """
2089
    __tablename__ = "egon_power_plants_pv_roof_building"
2090
    __table_args__ = {"schema": "supply"}
2091
2092
    index = Column(Integer, primary_key=True, index=True)
2093
    scenario = Column(String)
2094
    bus_id = Column(Integer, nullable=True)
2095
    building_id = Column(Integer)
2096
    gens_id = Column(String, nullable=True)
2097
    capacity = Column(Float)
2098
    orientation_uniform = Column(Float)
2099
    orientation_primary = Column(String)
2100
    orientation_primary_angle = Column(String)
2101
    voltage_level = Column(Integer)
2102
    weather_cell_id = Column(Integer)
2103
2104
2105
def add_metadata():
2106
    schema = "supply"
2107
    table = "egon_power_plants_pv_roof_building"
2108
    name = f"{schema}.{table}"
2109
    deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"]
2110
    deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][
2111
        "zenodo"
2112
    ]["deposit_id"]
2113
2114
    contris = contributors(["kh", "kh"])
2115
2116
    contris[0]["date"] = "2023-03-16"
2117
2118
    contris[0]["object"] = "metadata"
2119
    contris[1]["object"] = "dataset"
2120
2121
    contris[0]["comment"] = "Add metadata to dataset."
2122
    contris[1]["comment"] = "Add workflow to generate dataset."
2123
2124
    meta = {
2125
        "name": name,
2126
        "title": "eGon power plants rooftop solar",
2127
        "id": "WILL_BE_SET_AT_PUBLICATION",
2128
        "description": (
2129
            "eGon power plants rooftop solar systems allocated to buildings"
2130
        ),
2131
        "language": "en-US",
2132
        "keywords": ["photovoltaik", "solar", "pv", "mastr", "status quo"],
2133
        "publicationDate": datetime.date.today().isoformat(),
2134
        "context": context(),
2135
        "spatial": {
2136
            "location": "none",
2137
            "extent": "Germany",
2138
            "resolution": "building",
2139
        },
2140
        "temporal": {
2141
            "referenceDate": (
2142
                config.datasets()["mastr_new"]["egon2021_date_max"].split(" ")[
2143
                    0
2144
                ]
2145
            ),
2146
            "timeseries": {},
2147
        },
2148
        "sources": [
2149
            {
2150
                "title": "Data bundle for egon-data",
2151
                "description": (
2152
                    "Data bundle for egon-data: A transparent and "
2153
                    "reproducible data processing pipeline for energy "
2154
                    "system modeling"
2155
                ),
2156
                "path": (
2157
                    "https://zenodo.org/record/"
2158
                    f"{deposit_id_data_bundle}#.Y_dWM4CZMVM"
2159
                ),
2160
                "licenses": [license_dedl(attribution="© Cußmann, Ilka")],
2161
            },
2162
            {
2163
                "title": ("open-MaStR power unit registry for eGo^n project"),
2164
                "description": (
2165
                    "Data from Marktstammdatenregister (MaStR) data using "
2166
                    "the data dump from 2022-11-17 for eGon-data."
2167
                ),
2168
                "path": (
2169
                    f"https://zenodo.org/record/{deposit_id_mastr}"
2170
                ),
2171
                "licenses": [license_dedl(attribution="© Amme, Jonathan")],
2172
            },
2173
            sources()["openstreetmap"],
2174
            sources()["era5"],
2175
            sources()["vg250"],
2176
            sources()["egon-data"],
2177
        ],
2178
        "licenses": [license_odbl("© eGon development team")],
2179
        "contributors": contris,
2180
        "resources": [
2181
            {
2182
                "profile": "tabular-data-resource",
2183
                "name": name,
2184
                "path": "None",
2185
                "format": "PostgreSQL",
2186
                "encoding": "UTF-8",
2187
                "schema": {
2188
                    "fields": generate_resource_fields_from_db_table(
2189
                        schema,
2190
                        table,
2191
                    ),
2192
                    "primaryKey": "index",
2193
                },
2194
                "dialect": {"delimiter": "", "decimalSeparator": ""},
2195
            }
2196
        ],
2197
        "review": {"path": "", "badge": ""},
2198
        "metaMetadata": meta_metadata(),
2199
        "_comment": {
2200
            "metadata": (
2201
                "Metadata documentation and explanation (https://github."
2202
                "com/OpenEnergyPlatform/oemetadata/blob/master/metadata/"
2203
                "v141/metadata_key_description.md)"
2204
            ),
2205
            "dates": (
2206
                "Dates and time must follow the ISO8601 including time "
2207
                "zone (YYYY-MM-DD or YYYY-MM-DDThh:mm:ss±hh)"
2208
            ),
2209
            "units": "Use a space between numbers and units (100 m)",
2210
            "languages": (
2211
                "Languages must follow the IETF (BCP47) format (en-GB, "
2212
                "en-US, de-DE)"
2213
            ),
2214
            "licenses": (
2215
                "License name must follow the SPDX License List "
2216
                "(https://spdx.org/licenses/)"
2217
            ),
2218
            "review": (
2219
                "Following the OEP Data Review (https://github.com/"
2220
                "OpenEnergyPlatform/data-preprocessing/wiki)"
2221
            ),
2222
            "none": "If not applicable use (none)",
2223
        },
2224
    }
2225
2226
    dialect = get_dialect(f"oep-v{meta_metadata()['metadataVersion'][4:7]}")()
2227
2228
    meta = dialect.compile_and_render(dialect.parse(json.dumps(meta)))
2229
2230
    db.submit_comment(
2231
        f"'{json.dumps(meta)}'",
2232
        schema,
2233
        table,
2234
    )
2235
2236
2237
def create_scenario_table(buildings_gdf):
2238
    """Create mapping table pv_unit <-> building for scenario"""
2239
    EgonPowerPlantPvRoofBuilding.__table__.drop(bind=engine, checkfirst=True)
2240
    EgonPowerPlantPvRoofBuilding.__table__.create(bind=engine, checkfirst=True)
2241
2242
    buildings_gdf[COLS_TO_EXPORT].reset_index().to_sql(
2243
        name=EgonPowerPlantPvRoofBuilding.__table__.name,
2244
        schema=EgonPowerPlantPvRoofBuilding.__table__.schema,
2245
        con=db.engine(),
2246
        if_exists="append",
2247
        index=False,
2248
    )
2249
2250
    add_metadata()
2251
2252
2253
def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
2254
    sql = """
2255
    SELECT building_id, zensus_population_id
2256
    FROM boundaries.egon_map_zensus_mvgd_buildings
2257
    """
2258
2259
    buildings_gdf = buildings_gdf.merge(
2260
        right=db.select_dataframe(sql).drop_duplicates(subset="building_id"),
2261
        how="left",
2262
        on="building_id",
2263
    )
2264
2265
    sql = """
2266
    SELECT zensus_population_id, w_id as weather_cell_id
2267
    FROM boundaries.egon_map_zensus_weather_cell
2268
    """
2269
2270
    buildings_gdf = buildings_gdf.merge(
2271
        right=db.select_dataframe(sql).drop_duplicates(
2272
            subset="zensus_population_id"
2273
        ),
2274
        how="left",
2275
        on="zensus_population_id",
2276
    )
2277
2278
    if buildings_gdf.weather_cell_id.isna().any():
2279
        missing = buildings_gdf.loc[
2280
            buildings_gdf.weather_cell_id.isna(), "building_id"
2281
        ].tolist()
2282
2283
        raise ValueError(
2284
            f"Following buildings don't have a weather cell id: {missing}"
2285
        )
2286
2287
    return buildings_gdf
2288
2289
2290
def add_bus_ids_sq(
2291
    buildings_gdf: gpd.GeoDataFrame,
2292
) -> gpd.GeoDataFrame:
2293
    """Add bus ids for status_quo units
2294
2295
    Parameters
2296
    -----------
2297
    buildings_gdf : geopandas.GeoDataFrame
2298
        GeoDataFrame containing OSM buildings data with desaggregated PV
2299
        plants.
2300
    Returns
2301
    -------
2302
    geopandas.GeoDataFrame
2303
        GeoDataFrame containing OSM building data with bus_id per
2304
        generator.
2305
    """
2306
    grid_districts_gdf = grid_districts(EPSG)
2307
2308
    mask = buildings_gdf.scenario == "status_quo"
2309
2310
    buildings_gdf.loc[mask, "bus_id"] = (
2311
        buildings_gdf.loc[mask]
2312
        .sjoin(grid_districts_gdf, how="left")
2313
        .index_right
2314
    )
2315
2316
    return buildings_gdf
2317
2318
2319 View Code Duplication
def infer_voltage_level(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2320
    units_gdf: gpd.GeoDataFrame,
2321
) -> gpd.GeoDataFrame:
2322
    """
2323
    Infer nan values in voltage level derived from generator capacity to
2324
    the power plants.
2325
2326
    Parameters
2327
    -----------
2328
    units_gdf : geopandas.GeoDataFrame
2329
        GeoDataFrame containing units with voltage levels from MaStR
2330
    Returnsunits_gdf: gpd.GeoDataFrame
2331
    -------
2332
    geopandas.GeoDataFrame
2333
        GeoDataFrame containing units all having assigned a voltage level.
2334
    """
2335
2336
    def voltage_levels(p: float) -> int:
2337
        if p <= 0.1:
2338
            return 7
2339
        elif p <= 0.2:
2340
            return 6
2341
        elif p <= 5.5:
2342
            return 5
2343
        elif p <= 20:
2344
            return 4
2345
        elif p <= 120:
2346
            return 3
2347
        return 1
2348
2349
    units_gdf["voltage_level_inferred"] = False
2350
    mask = units_gdf.voltage_level.isna()
2351
    units_gdf.loc[mask, "voltage_level_inferred"] = True
2352
    units_gdf.loc[mask, "voltage_level"] = units_gdf.loc[mask].capacity.apply(
2353
        voltage_levels
2354
    )
2355
2356
    return units_gdf
2357
2358
2359
def pv_rooftop_to_buildings():
2360
    """Main script, executed as task"""
2361
2362
    mastr_gdf = load_mastr_data()
2363
2364
    status_quo = "status2023"
2365
2366
    ts = pd.Timestamp(
2367
        config.datasets()["mastr_new"][f"{status_quo}_date_max"], tz="UTC"
2368
    )
2369
2370
    mastr_gdf = mastr_gdf.loc[
2371
        mastr_gdf.commissioning_date <= ts
2372
    ]
2373
2374
    buildings_gdf = load_building_data()
2375
2376
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_to_buildings(
2377
        mastr_gdf, buildings_gdf
2378
    )
2379
2380
    all_buildings_gdf = (
2381
        desagg_mastr_gdf.assign(scenario=status_quo)
2382
        .reset_index()
2383
        .rename(columns={"geometry": "geom"})
2384
        .set_geometry("geom")
2385
    )
2386
2387
    scenario_buildings_gdf = all_buildings_gdf.copy()
2388
2389
    cap_per_bus_id_df = pd.DataFrame()
2390
2391
    for scenario in SCENARIOS:
2392
        if scenario == status_quo:
2393
            continue
2394
        elif "status" in scenario:
2395
            ts = pd.Timestamp(
2396
                config.datasets()["mastr_new"][f"{scenario}_date_max"], tz="UTC"
2397
            )
2398
2399
            scenario_buildings_gdf = scenario_buildings_gdf.loc[
2400
                scenario_buildings_gdf.commissioning_date <= ts
2401
            ]
2402
2403
        else:
2404
            logger.debug(f"Desaggregating scenario {scenario}.")
2405
2406
            (
2407
                scenario_buildings_gdf,
2408
                cap_per_bus_id_scenario_df,
2409
            ) = allocate_scenarios(  # noqa: F841
2410
                desagg_mastr_gdf,
2411
                desagg_buildings_gdf,
2412
                scenario_buildings_gdf,
2413
                scenario,
2414
            )
2415
2416
            cap_per_bus_id_df = pd.concat(
2417
                [cap_per_bus_id_df, cap_per_bus_id_scenario_df]
2418
            )
2419
2420
        all_buildings_gdf = gpd.GeoDataFrame(
2421
            pd.concat(
2422
                [all_buildings_gdf, scenario_buildings_gdf], ignore_index=True
2423
            ),
2424
            crs=scenario_buildings_gdf.crs,
2425
            geometry="geom",
2426
        )
2427
2428
    # add weather cell
2429
    all_buildings_gdf = add_weather_cell_id(all_buildings_gdf)
2430
2431
    # add bus IDs for status quo scenario
2432
    all_buildings_gdf = add_bus_ids_sq(all_buildings_gdf)
2433
2434
    # export scenario
2435
    create_scenario_table(infer_voltage_level(all_buildings_gdf))
2436