Passed
Push — dev ( 7cf077...0e9721 )
by
unknown
07:11 queued 04:45
created

timer_func()   A

Complexity

Conditions 1

Size

Total Lines 13
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 11
dl 0
loc 13
rs 9.85
c 0
b 0
f 0
cc 1
nop 1
1
"""
2
Distribute MaStR PV rooftop capacities to OSM and synthetic buildings. Generate
3
new PV rooftop generators for scenarios eGon2035 and eGon100RE.
4
5
See documentation section :ref:`pv-rooftop-ref` for more information.
6
7
"""
8
from __future__ import annotations
9
10
from collections import Counter
11
from functools import wraps
12
from time import perf_counter
13
import datetime
14
import json
15
16
from geoalchemy2 import Geometry
17
from loguru import logger
18
from numpy.random import RandomState, default_rng
19
from omi.dialects import get_dialect
20
from pyproj.crs.crs import CRS
21
from sqlalchemy import BigInteger, Column, Float, Integer, String
22
from sqlalchemy.dialects.postgresql import HSTORE
23
from sqlalchemy.ext.declarative import declarative_base
24
import geopandas as gpd
25
import numpy as np
26
import pandas as pd
27
28
from egon.data import config, db
29
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
30
    OsmBuildingsSynthetic,
31
)
32
from egon.data.datasets.power_plants.mastr_db_classes import EgonPowerPlantsPv
33
from egon.data.datasets.scenario_capacities import EgonScenarioCapacities
34
from egon.data.datasets.zensus_vg250 import Vg250Gem
35
from egon.data.metadata import (
36
    context,
37
    contributors,
38
    generate_resource_fields_from_db_table,
39
    license_dedl,
40
    license_odbl,
41
    meta_metadata,
42
    meta_metadata,
43
    sources,
44
)
45
46
engine = db.engine()
47
Base = declarative_base()
48
SEED = int(config.settings()["egon-data"]["--random-seed"])
49
50
# TODO: move to yml
51
MASTR_INDEX_COL = "gens_id"
52
53
EPSG = 4326
54
SRID = 3035
55
56
# data cleaning
57
MAX_REALISTIC_PV_CAP = 23500 / 10**3
58
MIN_REALISTIC_PV_CAP = 0.1 / 10**3
59
60
# show additional logging information
61
VERBOSE = False
62
63
# Number of quantiles
64
Q = 5
65
66
# Scenario Data
67
SCENARIOS = ["eGon2035", "eGon100RE"]
68
SCENARIO_TIMESTAMP = {
69
    "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"),
70
    "eGon100RE": pd.Timestamp("2050-01-01", tz="UTC"),
71
}
72
PV_ROOFTOP_LIFETIME = pd.Timedelta(20 * 365, unit="D")
73
74
# Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp
75
# https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf
76
MODUL_CAP = 0.4 / 10**3  # MWp
77
MODUL_SIZE = 1.096 * 1.754  # m²
78
PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE
79
80
# Estimation of usable roof area
81
# Factor for the conversion of building area to roof area
82
# estimation mean roof pitch: 35°
83
# estimation usable roof share: 80%
84
# estimation that only the south side of the building is used for pv
85
# see https://mediatum.ub.tum.de/doc/%20969497/969497.pdf
86
# AREA_FACTOR = 1.221
87
# USABLE_ROOF_SHARE = 0.8
88
# SOUTH_SHARE = 0.5
89
# ROOF_FACTOR = AREA_FACTOR * USABLE_ROOF_SHARE * SOUTH_SHARE
90
ROOF_FACTOR = 0.5
91
92
CAP_RANGES = [
93
    (0, 30 / 10**3),
94
    (30 / 10**3, 100 / 10**3),
95
    (100 / 10**3, float("inf")),
96
]
97
98
MIN_BUILDING_SIZE = 10.0
99
UPPER_QUANTILE = 0.95
100
LOWER_QUANTILE = 0.05
101
102
COLS_TO_EXPORT = [
103
    "scenario",
104
    "bus_id",
105
    "building_id",
106
    "gens_id",
107
    "capacity",
108
    "orientation_uniform",
109
    "orientation_primary",
110
    "orientation_primary_angle",
111
    "voltage_level",
112
    "weather_cell_id",
113
]
114
115
# TODO
116
INCLUDE_SYNTHETIC_BUILDINGS = True
117
ONLY_BUILDINGS_WITH_DEMAND = True
118
TEST_RUN = False
119
120
121
def timer_func(func):
122
    @wraps(func)
123
    def timeit_wrapper(*args, **kwargs):
124
        start_time = perf_counter()
125
        result = func(*args, **kwargs)
126
        end_time = perf_counter()
127
        total_time = end_time - start_time
128
        logger.debug(
129
            f"Function {func.__name__} took {total_time:.4f} seconds."
130
        )
131
        return result
132
133
    return timeit_wrapper
134
135
136
@timer_func
137
def mastr_data(
138
    index_col: str | int | list[str] | list[int],
139
) -> gpd.GeoDataFrame:
140
    """
141
    Read MaStR data from database.
142
143
    Parameters
144
    -----------
145
    index_col : str, int or list of str or int
146
        Column(s) to use as the row labels of the DataFrame.
147
    Returns
148
    -------
149
    pandas.DataFrame
150
        DataFrame containing MaStR data.
151
    """
152
    with db.session_scope() as session:
153
        query = session.query(EgonPowerPlantsPv).filter(
154
            EgonPowerPlantsPv.status == "InBetrieb",
155
            EgonPowerPlantsPv.site_type
156
            == ("Bauliche Anlagen (Hausdach, Gebäude und Fassade)"),
157
        )
158
159
    gdf = gpd.read_postgis(
160
        query.statement, query.session.bind, index_col=index_col
161
    ).drop(columns="id")
162
163
    logger.debug("MaStR data loaded.")
164
165
    return gdf
166
167
168
@timer_func
169
def clean_mastr_data(
170
    mastr_gdf: gpd.GeoDataFrame,
171
    max_realistic_pv_cap: int | float,
172
    min_realistic_pv_cap: int | float,
173
    seed: int,
174
) -> gpd.GeoDataFrame:
175
    """
176
    Clean the MaStR data from implausible data.
177
178
    * Drop MaStR ID duplicates.
179
    * Drop generators with implausible capacities.
180
181
    Parameters
182
    -----------
183
    mastr_gdf : pandas.DataFrame
184
        DataFrame containing MaStR data.
185
    max_realistic_pv_cap : int or float
186
        Maximum capacity, which is considered to be realistic.
187
    min_realistic_pv_cap : int or float
188
        Minimum capacity, which is considered to be realistic.
189
    seed : int
190
        Seed to use for random operations with NumPy and pandas.
191
    Returns
192
    -------
193
    pandas.DataFrame
194
        DataFrame containing cleaned MaStR data.
195
    """
196
    init_len = len(mastr_gdf)
197
198
    # drop duplicates
199
    mastr_gdf = mastr_gdf.loc[~mastr_gdf.index.duplicated()]
200
201
    # drop generators without any capacity info
202
    # and capacity of zero
203
    # and if the capacity is > 23.5 MW, because
204
    # Germanies largest rooftop PV is 23 MW
205
    # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379
206
    mastr_gdf = mastr_gdf.loc[
207
        ~mastr_gdf.capacity.isna()
208
        & (mastr_gdf.capacity <= max_realistic_pv_cap)
209
        & (mastr_gdf.capacity > min_realistic_pv_cap)
210
    ]
211
212
    # get consistent start-up date
213
    # randomly and weighted fill missing start-up dates
214
    pool = mastr_gdf.loc[
215
        ~mastr_gdf.commissioning_date.isna()
216
    ].commissioning_date.to_numpy()
217
218
    size = len(mastr_gdf) - len(pool)
219
220
    if size > 0:
221
        rng = default_rng(seed=seed)
222
223
        choice = rng.choice(
224
            pool,
225
            size=size,
226
            replace=False,
227
        )
228
229
        mastr_gdf.loc[mastr_gdf.commissioning_date.isna()] = mastr_gdf.loc[
230
            mastr_gdf.commissioning_date.isna()
231
        ].assign(commissioning_date=choice)
232
233
        logger.info(
234
            f"Randomly and weigthed added start-up date to {size} generators."
235
        )
236
237
    mastr_gdf = mastr_gdf.assign(
238
        commissioning_date=pd.to_datetime(
239
            mastr_gdf.commissioning_date, utc=True
240
        )
241
    )
242
243
    end_len = len(mastr_gdf)
244
    logger.debug(
245
        f"Dropped {init_len - end_len} "
246
        f"({((init_len - end_len) / init_len) * 100:g}%)"
247
        f" of {init_len} rows from MaStR DataFrame."
248
    )
249
250
    return mastr_gdf
251
252
253
@timer_func
254
def municipality_data() -> gpd.GeoDataFrame:
255
    """
256
    Get municipality data from eGo^n Database.
257
    Returns
258
    -------
259
    gepandas.GeoDataFrame
260
        GeoDataFrame with municipality data.
261
    """
262
    with db.session_scope() as session:
263
        query = session.query(Vg250Gem.ags, Vg250Gem.geometry.label("geom"))
264
265
    return gpd.read_postgis(
266
        query.statement, query.session.bind, index_col="ags"
267
    )
268
269
270
@timer_func
271
def add_ags_to_gens(
272
    mastr_gdf: gpd.GeoDataFrame,
273
    municipalities_gdf: gpd.GeoDataFrame,
274
) -> gpd.GeoDataFrame:
275
    """
276
    Add information about AGS ID to generators.
277
    Parameters
278
    -----------
279
    mastr_gdf : geopandas.GeoDataFrame
280
        GeoDataFrame with valid and cleaned MaStR data.
281
    municipalities_gdf : geopandas.GeoDataFrame
282
        GeoDataFrame with municipality data.
283
    Returns
284
    -------
285
    gepandas.GeoDataFrame
286
        GeoDataFrame with valid and cleaned MaStR data
287
        with AGS ID added.
288
    """
289
    return mastr_gdf.sjoin(
290
        municipalities_gdf,
291
        how="left",
292
        predicate="intersects",
293
    ).rename(columns={"index_right": "ags"})
294
295
296
def drop_gens_outside_muns(
297
    mastr_gdf: gpd.GeoDataFrame,
298
) -> gpd.GeoDataFrame:
299
    """
300
    Drop all generators outside of municipalities.
301
    Parameters
302
    -----------
303
    mastr_gdf : geopandas.GeoDataFrame
304
        GeoDataFrame with valid and cleaned MaStR data.
305
    Returns
306
    -------
307
    gepandas.GeoDataFrame
308
        GeoDataFrame with valid and cleaned MaStR data
309
        with generatos without an AGS ID dropped.
310
    """
311
    gdf = mastr_gdf.loc[~mastr_gdf.ags.isna()]
312
313
    logger.debug(
314
        f"{len(mastr_gdf) - len(gdf)} ("
315
        f"{(len(mastr_gdf) - len(gdf)) / len(mastr_gdf) * 100:g}%)"
316
        f" of {len(mastr_gdf)} values are outside of the municipalities"
317
        " and are therefore dropped."
318
    )
319
320
    return gdf
321
322
323
def load_mastr_data():
324
    """Read PV rooftop data from MaStR CSV
325
    Note: the source will be replaced as soon as the MaStR data is available
326
    in DB.
327
    Returns
328
    -------
329
    geopandas.GeoDataFrame
330
        GeoDataFrame containing MaStR data with geocoded locations.
331
    """
332
    mastr_gdf = mastr_data(
333
        MASTR_INDEX_COL,
334
    )
335
336
    clean_mastr_gdf = clean_mastr_data(
337
        mastr_gdf,
338
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
339
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
340
        seed=SEED,
341
    )
342
343
    municipalities_gdf = municipality_data()
344
345
    clean_mastr_gdf = add_ags_to_gens(clean_mastr_gdf, municipalities_gdf)
346
347
    return drop_gens_outside_muns(clean_mastr_gdf)
348
349
350
class OsmBuildingsFiltered(Base):
351
    """
352
    Class definition of table openstreetmap.osm_buildings_filtered.
353
354
    """
355
    __tablename__ = "osm_buildings_filtered"
356
    __table_args__ = {"schema": "openstreetmap"}
357
358
    osm_id = Column(BigInteger)
359
    amenity = Column(String)
360
    building = Column(String)
361
    name = Column(String)
362
    geom = Column(Geometry(srid=SRID), index=True)
363
    area = Column(Float)
364
    geom_point = Column(Geometry(srid=SRID), index=True)
365
    tags = Column(HSTORE)
366
    id = Column(BigInteger, primary_key=True, index=True)
367
368
369
@timer_func
370
def osm_buildings(
371
    to_crs: CRS,
372
) -> gpd.GeoDataFrame:
373
    """
374
    Read OSM buildings data from eGo^n Database.
375
    Parameters
376
    -----------
377
    to_crs : pyproj.crs.crs.CRS
378
        CRS to transform geometries to.
379
    Returns
380
    -------
381
    geopandas.GeoDataFrame
382
        GeoDataFrame containing OSM buildings data.
383
    """
384
    with db.session_scope() as session:
385
        query = session.query(
386
            OsmBuildingsFiltered.id,
387
            OsmBuildingsFiltered.area,
388
            OsmBuildingsFiltered.geom_point.label("geom"),
389
        )
390
391
    return gpd.read_postgis(
392
        query.statement, query.session.bind, index_col="id"
393
    ).to_crs(to_crs)
394
395
396
@timer_func
397
def synthetic_buildings(
398
    to_crs: CRS,
399
) -> gpd.GeoDataFrame:
400
    """
401
    Read synthetic buildings data from eGo^n Database.
402
    Parameters
403
    -----------
404
    to_crs : pyproj.crs.crs.CRS
405
        CRS to transform geometries to.
406
    Returns
407
    -------
408
    geopandas.GeoDataFrame
409
        GeoDataFrame containing OSM buildings data.
410
    """
411
    with db.session_scope() as session:
412
        query = session.query(
413
            OsmBuildingsSynthetic.id,
414
            OsmBuildingsSynthetic.area,
415
            OsmBuildingsSynthetic.geom_point.label("geom"),
416
        )
417
418
    return gpd.read_postgis(
419
        query.statement, query.session.bind, index_col="id"
420
    ).to_crs(to_crs)
421
422
423
@timer_func
424
def add_ags_to_buildings(
425
    buildings_gdf: gpd.GeoDataFrame,
426
    municipalities_gdf: gpd.GeoDataFrame,
427
) -> gpd.GeoDataFrame:
428
    """
429
    Add information about AGS ID to buildings.
430
    Parameters
431
    -----------
432
    buildings_gdf : geopandas.GeoDataFrame
433
        GeoDataFrame containing OSM buildings data.
434
    municipalities_gdf : geopandas.GeoDataFrame
435
        GeoDataFrame with municipality data.
436
    Returns
437
    -------
438
    gepandas.GeoDataFrame
439
        GeoDataFrame containing OSM buildings data
440
        with AGS ID added.
441
    """
442
    return buildings_gdf.sjoin(
443
        municipalities_gdf,
444
        how="left",
445
        predicate="intersects",
446
    ).rename(columns={"index_right": "ags"})
447
448
449
def drop_buildings_outside_muns(
450
    buildings_gdf: gpd.GeoDataFrame,
451
) -> gpd.GeoDataFrame:
452
    """
453
    Drop all buildings outside of municipalities.
454
    Parameters
455
    -----------
456
    buildings_gdf : geopandas.GeoDataFrame
457
        GeoDataFrame containing OSM buildings data.
458
    Returns
459
    -------
460
    gepandas.GeoDataFrame
461
        GeoDataFrame containing OSM buildings data
462
        with buildings without an AGS ID dropped.
463
    """
464
    gdf = buildings_gdf.loc[~buildings_gdf.ags.isna()]
465
466
    logger.debug(
467
        f"{len(buildings_gdf) - len(gdf)} "
468
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
469
        f"of {len(buildings_gdf)} values are outside of the municipalities "
470
        "and are therefore dropped."
471
    )
472
473
    return gdf
474
475
476
def egon_building_peak_loads():
477
    sql = """
478
    SELECT building_id
479
    FROM demand.egon_building_electricity_peak_loads
480
    WHERE scenario = 'eGon2035'
481
    """
482
483
    return (
484
        db.select_dataframe(sql).building_id.astype(int).sort_values().unique()
485
    )
486
487
488
@timer_func
489
def load_building_data():
490
    """
491
    Read buildings from DB
492
    Tables:
493
494
    * `openstreetmap.osm_buildings_filtered` (from OSM)
495
    * `openstreetmap.osm_buildings_synthetic` (synthetic, created by us)
496
497
    Use column `id` for both as it is unique hence you concat both datasets.
498
    If INCLUDE_SYNTHETIC_BUILDINGS is False synthetic buildings will not be
499
    loaded.
500
501
    Returns
502
    -------
503
    gepandas.GeoDataFrame
504
        GeoDataFrame containing OSM buildings data with buildings without an
505
        AGS ID dropped.
506
    """
507
508
    municipalities_gdf = municipality_data()
509
510
    osm_buildings_gdf = osm_buildings(municipalities_gdf.crs)
511
512
    if INCLUDE_SYNTHETIC_BUILDINGS:
513
        synthetic_buildings_gdf = synthetic_buildings(municipalities_gdf.crs)
514
515
        buildings_gdf = gpd.GeoDataFrame(
516
            pd.concat(
517
                [
518
                    osm_buildings_gdf,
519
                    synthetic_buildings_gdf,
520
                ]
521
            ),
522
            geometry="geom",
523
            crs=osm_buildings_gdf.crs,
524
        ).rename(columns={"area": "building_area"})
525
526
        buildings_gdf.index = buildings_gdf.index.astype(int)
527
528
    else:
529
        buildings_gdf = osm_buildings_gdf.rename(
530
            columns={"area": "building_area"}
531
        )
532
533
    if ONLY_BUILDINGS_WITH_DEMAND:
534
        building_ids = egon_building_peak_loads()
535
536
        init_len = len(building_ids)
537
538
        building_ids = np.intersect1d(
539
            list(map(int, building_ids)),
540
            list(map(int, buildings_gdf.index.to_numpy())),
541
        )
542
543
        end_len = len(building_ids)
544
545
        logger.debug(
546
            f"{end_len/init_len * 100: g} % ({end_len} / {init_len}) "
547
            f"of buildings have peak load."
548
        )
549
550
        buildings_gdf = buildings_gdf.loc[building_ids]
551
552
    buildings_ags_gdf = add_ags_to_buildings(buildings_gdf, municipalities_gdf)
553
554
    buildings_ags_gdf = drop_buildings_outside_muns(buildings_ags_gdf)
555
556
    grid_districts_gdf = grid_districts(EPSG)
557
558
    federal_state_gdf = federal_state_data(grid_districts_gdf.crs)
559
560
    grid_federal_state_gdf = overlay_grid_districts_with_counties(
561
        grid_districts_gdf,
562
        federal_state_gdf,
563
    )
564
565
    buildings_overlay_gdf = add_overlay_id_to_buildings(
566
        buildings_ags_gdf,
567
        grid_federal_state_gdf,
568
    )
569
570
    logger.debug("Loaded buildings.")
571
572
    buildings_overlay_gdf = drop_buildings_outside_grids(buildings_overlay_gdf)
573
574
    # overwrite bus_id with data from new table
575
    sql = (
576
        "SELECT building_id, bus_id FROM "
577
        "boundaries.egon_map_zensus_mvgd_buildings"
578
    )
579
    map_building_bus_df = db.select_dataframe(sql)
580
581
    building_ids = np.intersect1d(
582
        list(map(int, map_building_bus_df.building_id.unique())),
583
        list(map(int, buildings_overlay_gdf.index.to_numpy())),
584
    )
585
586
    buildings_within_gdf = buildings_overlay_gdf.loc[building_ids]
587
588
    gdf = (
589
        buildings_within_gdf.reset_index()
590
        .drop(columns=["bus_id"])
591
        .merge(
592
            how="left",
593
            right=map_building_bus_df,
594
            left_on="id",
595
            right_on="building_id",
596
        )
597
        .drop(columns=["building_id"])
598
        .set_index("id")
599
        .sort_index()
600
    )
601
602
    return gdf[~gdf.index.duplicated(keep="first")]
603
604
605
@timer_func
606
def sort_and_qcut_df(
607
    df: pd.DataFrame | gpd.GeoDataFrame,
608
    col: str,
609
    q: int,
610
) -> pd.DataFrame | gpd.GeoDataFrame:
611
    """
612
    Determine the quantile of a given attribute in a (Geo)DataFrame.
613
    Sort the (Geo)DataFrame in ascending order for the given attribute.
614
    Parameters
615
    -----------
616
    df : pandas.DataFrame or geopandas.GeoDataFrame
617
        (Geo)DataFrame to sort and qcut.
618
    col : str
619
        Name of the attribute to sort and qcut the (Geo)DataFrame on.
620
    q : int
621
        Number of quantiles.
622
    Returns
623
    -------
624
    pandas.DataFrame or gepandas.GeoDataFrame
625
        Sorted and qcut (Geo)DataFrame.
626
    """
627
    df = df.sort_values(col, ascending=True)
628
629
    return df.assign(
630
        quant=pd.qcut(
631
            df[col],
632
            q=q,
633
            labels=range(q),
634
        )
635
    )
636
637
638
@timer_func
639
def allocate_pv(
640
    q_mastr_gdf: gpd.GeoDataFrame,
641
    q_buildings_gdf: gpd.GeoDataFrame,
642
    seed: int,
643
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
644
    """
645
    Allocate the MaStR pv generators to the OSM buildings.
646
    This will determine a building for each pv generator if there are more
647
    buildings than generators within a given AGS. Primarily generators are
648
    distributed with the same qunatile as the buildings. Multiple assignment
649
    is excluded.
650
    Parameters
651
    -----------
652
    q_mastr_gdf : geopandas.GeoDataFrame
653
        GeoDataFrame containing geocoded and qcut MaStR data.
654
    q_buildings_gdf : geopandas.GeoDataFrame
655
        GeoDataFrame containing qcut OSM buildings data.
656
    seed : int
657
        Seed to use for random operations with NumPy and pandas.
658
    Returns
659
    -------
660
    tuple with two geopandas.GeoDataFrame s
661
        GeoDataFrame containing MaStR data allocated to building IDs.
662
        GeoDataFrame containing building data allocated to MaStR IDs.
663
    """
664
    rng = default_rng(seed=seed)
665
666
    q_buildings_gdf = q_buildings_gdf.assign(gens_id=np.nan).sort_values(
667
        by=["ags", "quant"]
668
    )
669
    q_mastr_gdf = q_mastr_gdf.assign(building_id=np.nan).sort_values(
670
        by=["ags", "quant"]
671
    )
672
673
    ags_list = q_buildings_gdf.ags.unique()
674
675
    if TEST_RUN:
676
        ags_list = ags_list[:250]
677
678
    num_ags = len(ags_list)
679
680
    t0 = perf_counter()
681
682
    for count, ags in enumerate(ags_list):
683
684
        buildings = q_buildings_gdf.loc[q_buildings_gdf.ags == ags]
685
        gens = q_mastr_gdf.loc[q_mastr_gdf.ags == ags]
686
687
        len_build = len(buildings)
688
        len_gens = len(gens)
689
690
        if len_build < len_gens:
691
            gens = gens.sample(len_build, random_state=RandomState(seed=seed))
692
            logger.error(
693
                f"There are {len_gens} generators and only {len_build}"
694
                f" buildings in AGS {ags}. {len_gens - len(gens)} "
695
                "generators were truncated to match the amount of buildings."
696
            )
697
698
            assert len_build == len(gens)
699
700
        for quant in gens.quant.unique():
701
            q_buildings = buildings.loc[buildings.quant == quant]
702
            q_gens = gens.loc[gens.quant == quant]
703
704
            len_build = len(q_buildings)
705
            len_gens = len(q_gens)
706
707
            if len_build < len_gens:
708
                delta = len_gens - len_build
709
710
                logger.warning(
711
                    f"There are {len_gens} generators and only {len_build} "
712
                    f"buildings in AGS {ags} and quantile {quant}. {delta} "
713
                    f"buildings from AGS {ags} will be added randomly."
714
                )
715
716
                add_buildings = pd.Index(
717
                    rng.choice(
718
                        list(set(buildings.index) - set(q_buildings.index)),
719
                        size=delta,
720
                        replace=False,
721
                    )
722
                )
723
724
                chosen_buildings = q_buildings.index.append(add_buildings)
725
726
            else:
727
                chosen_buildings = rng.choice(
728
                    q_buildings.index,
729
                    size=len_gens,
730
                    replace=False,
731
                )
732
733
            q_buildings_gdf.loc[chosen_buildings, "gens_id"] = q_gens.index
734
            buildings = buildings.drop(chosen_buildings)
735
736
        if count % 500 == 0:
737
            logger.debug(
738
                f"Allocation of {count / num_ags * 100:g} % of AGS done. "
739
                f"It took {perf_counter() - t0:g} seconds."
740
            )
741
742
            t0 = perf_counter()
743
744
    assigned_buildings = q_buildings_gdf.loc[~q_buildings_gdf.gens_id.isna()]
745
746
    assert len(assigned_buildings) == len(assigned_buildings.gens_id.unique())
747
748
    q_mastr_gdf.loc[
749
        assigned_buildings.gens_id, "building_id"
750
    ] = assigned_buildings.index
751
752
    assigned_gens = q_mastr_gdf.loc[~q_mastr_gdf.building_id.isna()]
753
754
    assert len(assigned_buildings) == len(assigned_gens)
755
756
    logger.debug("Allocated status quo generators to buildings.")
757
758
    return frame_to_numeric(q_mastr_gdf), frame_to_numeric(q_buildings_gdf)
759
760
761
def frame_to_numeric(
762
    df: pd.DataFrame | gpd.GeoDataFrame,
763
) -> pd.DataFrame | gpd.GeoDataFrame:
764
    """
765
    Try to convert all columns of a DataFrame to numeric ignoring errors.
766
    Parameters
767
    ----------
768
    df : pandas.DataFrame or geopandas.GeoDataFrame
769
    Returns
770
    -------
771
    pandas.DataFrame or geopandas.GeoDataFrame
772
    """
773
    if str(df.index.dtype) == "object":
774
        df.index = pd.to_numeric(df.index, errors="ignore")
775
776
    for col in df.columns:
777
        if str(df[col].dtype) == "object":
778
            df[col] = pd.to_numeric(df[col], errors="ignore")
779
780
    return df
781
782
783
def validate_output(
784
    desagg_mastr_gdf: pd.DataFrame | gpd.GeoDataFrame,
785
    desagg_buildings_gdf: pd.DataFrame | gpd.GeoDataFrame,
786
) -> None:
787
    """
788
    Validate output.
789
790
    * Validate that there are exactly as many buildings with a pv system as
791
      there are pv systems with a building
792
    * Validate that the building IDs with a pv system are the same building
793
      IDs as assigned to the pv systems
794
    * Validate that the pv system IDs with a building are the same pv system
795
      IDs as assigned to the buildings
796
797
    Parameters
798
    -----------
799
    desagg_mastr_gdf : geopandas.GeoDataFrame
800
        GeoDataFrame containing MaStR data allocated to building IDs.
801
    desagg_buildings_gdf : geopandas.GeoDataFrame
802
        GeoDataFrame containing building data allocated to MaStR IDs.
803
    """
804
    assert len(
805
        desagg_mastr_gdf.loc[~desagg_mastr_gdf.building_id.isna()]
806
    ) == len(desagg_buildings_gdf.loc[~desagg_buildings_gdf.gens_id.isna()])
807
    assert (
808
        np.sort(
809
            desagg_mastr_gdf.loc[
810
                ~desagg_mastr_gdf.building_id.isna()
811
            ].building_id.unique()
812
        )
813
        == np.sort(
814
            desagg_buildings_gdf.loc[
815
                ~desagg_buildings_gdf.gens_id.isna()
816
            ].index.unique()
817
        )
818
    ).all()
819
    assert (
820
        np.sort(
821
            desagg_mastr_gdf.loc[
822
                ~desagg_mastr_gdf.building_id.isna()
823
            ].index.unique()
824
        )
825
        == np.sort(
826
            desagg_buildings_gdf.loc[
827
                ~desagg_buildings_gdf.gens_id.isna()
828
            ].gens_id.unique()
829
        )
830
    ).all()
831
832
    logger.debug("Validated output.")
833
834
835
def drop_unallocated_gens(
836
    gdf: gpd.GeoDataFrame,
837
) -> gpd.GeoDataFrame:
838
    """
839
    Drop generators which did not get allocated.
840
841
    Parameters
842
    -----------
843
    gdf : geopandas.GeoDataFrame
844
        GeoDataFrame containing MaStR data allocated to building IDs.
845
    Returns
846
    -------
847
    geopandas.GeoDataFrame
848
        GeoDataFrame containing MaStR data with generators dropped which did
849
        not get allocated.
850
    """
851
    init_len = len(gdf)
852
    gdf = gdf.loc[~gdf.building_id.isna()]
853
    end_len = len(gdf)
854
855
    logger.debug(
856
        f"Dropped {init_len - end_len} "
857
        f"({((init_len - end_len) / init_len) * 100:g}%)"
858
        f" of {init_len} unallocated rows from MaStR DataFrame."
859
    )
860
861
    return gdf
862
863
864
@timer_func
865
def allocate_to_buildings(
866
    mastr_gdf: gpd.GeoDataFrame,
867
    buildings_gdf: gpd.GeoDataFrame,
868
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
869
    """
870
    Allocate status quo pv rooftop generators to buildings.
871
    Parameters
872
    -----------
873
    mastr_gdf : geopandas.GeoDataFrame
874
        GeoDataFrame containing MaStR data with geocoded locations.
875
    buildings_gdf : geopandas.GeoDataFrame
876
        GeoDataFrame containing OSM buildings data with buildings without an
877
        AGS ID dropped.
878
    Returns
879
    -------
880
    tuple with two geopandas.GeoDataFrame s
881
        GeoDataFrame containing MaStR data allocated to building IDs.
882
        GeoDataFrame containing building data allocated to MaStR IDs.
883
    """
884
    logger.debug("Starting allocation of status quo.")
885
886
    q_mastr_gdf = sort_and_qcut_df(mastr_gdf, col="capacity", q=Q)
887
    q_buildings_gdf = sort_and_qcut_df(buildings_gdf, col="building_area", q=Q)
888
889
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_pv(
890
        q_mastr_gdf, q_buildings_gdf, SEED
891
    )
892
893
    validate_output(desagg_mastr_gdf, desagg_buildings_gdf)
894
895
    return drop_unallocated_gens(desagg_mastr_gdf), desagg_buildings_gdf
896
897
898
@timer_func
899
def grid_districts(
900
    epsg: int,
901
) -> gpd.GeoDataFrame:
902
    """
903
    Load mv grid district geo data from eGo^n Database as
904
    geopandas.GeoDataFrame.
905
    Parameters
906
    -----------
907
    epsg : int
908
        EPSG ID to use as CRS.
909
    Returns
910
    -------
911
    geopandas.GeoDataFrame
912
        GeoDataFrame containing mv grid district ID and geo shapes data.
913
    """
914
    gdf = db.select_geodataframe(
915
        """
916
        SELECT bus_id, geom
917
        FROM grid.egon_mv_grid_district
918
        ORDER BY bus_id
919
        """,
920
        index_col="bus_id",
921
        geom_col="geom",
922
        epsg=epsg,
923
    )
924
925
    gdf.index = gdf.index.astype(int)
926
927
    logger.debug("Grid districts loaded.")
928
929
    return gdf
930
931
932
def scenario_data(
933
    carrier: str = "solar_rooftop",
934
    scenario: str = "eGon2035",
935
) -> pd.DataFrame:
936
    """
937
    Get scenario capacity data from eGo^n Database.
938
    Parameters
939
    -----------
940
    carrier : str
941
        Carrier type to filter table by.
942
    scenario : str
943
        Scenario to filter table by.
944
    Returns
945
    -------
946
    geopandas.GeoDataFrame
947
        GeoDataFrame with scenario capacity data in GW.
948
    """
949
    with db.session_scope() as session:
950
        query = session.query(EgonScenarioCapacities).filter(
951
            EgonScenarioCapacities.carrier == carrier,
952
            EgonScenarioCapacities.scenario_name == scenario,
953
        )
954
955
    df = pd.read_sql(
956
        query.statement, query.session.bind, index_col="index"
957
    ).sort_index()
958
959
    logger.debug("Scenario capacity data loaded.")
960
961
    return df
962
963
964 View Code Duplication
class Vg250Lan(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
965
    """
966
    Class definition of table boundaries.vg250_lan.
967
968
    """
969
    __tablename__ = "vg250_lan"
970
    __table_args__ = {"schema": "boundaries"}
971
972
    id = Column(BigInteger, primary_key=True, index=True)
973
    ade = Column(BigInteger)
974
    gf = Column(BigInteger)
975
    bsg = Column(BigInteger)
976
    ars = Column(String)
977
    ags = Column(String)
978
    sdv_ars = Column(String)
979
    gen = Column(String)
980
    bez = Column(String)
981
    ibz = Column(BigInteger)
982
    bem = Column(String)
983
    nbd = Column(String)
984
    sn_l = Column(String)
985
    sn_r = Column(String)
986
    sn_k = Column(String)
987
    sn_v1 = Column(String)
988
    sn_v2 = Column(String)
989
    sn_g = Column(String)
990
    fk_s3 = Column(String)
991
    nuts = Column(String)
992
    ars_0 = Column(String)
993
    ags_0 = Column(String)
994
    wsk = Column(String)
995
    debkg_id = Column(String)
996
    rs = Column(String)
997
    sdv_rs = Column(String)
998
    rs_0 = Column(String)
999
    geometry = Column(Geometry(srid=EPSG), index=True)
1000
1001
1002
def federal_state_data(to_crs: CRS) -> gpd.GeoDataFrame:
1003
    """
1004
    Get feder state data from eGo^n Database.
1005
    Parameters
1006
    -----------
1007
    to_crs : pyproj.crs.crs.CRS
1008
        CRS to transform geometries to.
1009
    Returns
1010
    -------
1011
    geopandas.GeoDataFrame
1012
        GeoDataFrame with federal state data.
1013
    """
1014
    with db.session_scope() as session:
1015
        query = session.query(
1016
            Vg250Lan.id, Vg250Lan.nuts, Vg250Lan.geometry.label("geom")
1017
        )
1018
1019
        gdf = gpd.read_postgis(
1020
            query.statement, session.connection(), index_col="id"
1021
        ).to_crs(to_crs)
1022
1023
    logger.debug("Federal State data loaded.")
1024
1025
    return gdf
1026
1027
1028
@timer_func
1029
def overlay_grid_districts_with_counties(
1030
    mv_grid_district_gdf: gpd.GeoDataFrame,
1031
    federal_state_gdf: gpd.GeoDataFrame,
1032
) -> gpd.GeoDataFrame:
1033
    """
1034
    Calculate the intersections of mv grid districts and counties.
1035
    Parameters
1036
    -----------
1037
    mv_grid_district_gdf : gpd.GeoDataFrame
1038
        GeoDataFrame containing mv grid district ID and geo shapes data.
1039
    federal_state_gdf : gpd.GeoDataFrame
1040
        GeoDataFrame with federal state data.
1041
    Returns
1042
    -------
1043
    geopandas.GeoDataFrame
1044
        GeoDataFrame containing OSM buildings data.
1045
    """
1046
    logger.debug(
1047
        "Calculating intersection overlay between mv grid districts and "
1048
        "counties. This may take a while..."
1049
    )
1050
1051
    gdf = gpd.overlay(
1052
        federal_state_gdf.to_crs(mv_grid_district_gdf.crs),
1053
        mv_grid_district_gdf.reset_index(),
1054
        how="intersection",
1055
        keep_geom_type=True,
1056
    )
1057
1058
    logger.debug("Done!")
1059
1060
    return gdf
1061
1062
1063
@timer_func
1064
def add_overlay_id_to_buildings(
1065
    buildings_gdf: gpd.GeoDataFrame,
1066
    grid_federal_state_gdf: gpd.GeoDataFrame,
1067
) -> gpd.GeoDataFrame:
1068
    """
1069
    Add information about overlay ID to buildings.
1070
    Parameters
1071
    -----------
1072
    buildings_gdf : geopandas.GeoDataFrame
1073
        GeoDataFrame containing OSM buildings data.
1074
    grid_federal_state_gdf : geopandas.GeoDataFrame
1075
        GeoDataFrame with intersection shapes between counties and grid
1076
        districts.
1077
    Returns
1078
    -------
1079
    geopandas.GeoDataFrame
1080
        GeoDataFrame containing OSM buildings data with overlay ID added.
1081
    """
1082
    gdf = (
1083
        buildings_gdf.to_crs(grid_federal_state_gdf.crs)
1084
        .sjoin(
1085
            grid_federal_state_gdf,
1086
            how="left",
1087
            predicate="intersects",
1088
        )
1089
        .rename(columns={"index_right": "overlay_id"})
1090
    )
1091
1092
    logger.debug("Added overlay ID to OSM buildings.")
1093
1094
    return gdf
1095
1096
1097
def drop_buildings_outside_grids(
1098
    buildings_gdf: gpd.GeoDataFrame,
1099
) -> gpd.GeoDataFrame:
1100
    """
1101
    Drop all buildings outside of grid areas.
1102
    Parameters
1103
    -----------
1104
    buildings_gdf : geopandas.GeoDataFrame
1105
        GeoDataFrame containing OSM buildings data.
1106
    Returns
1107
    -------
1108
    gepandas.GeoDataFrame
1109
        GeoDataFrame containing OSM buildings data
1110
        with buildings without an bus ID dropped.
1111
    """
1112
    gdf = buildings_gdf.loc[~buildings_gdf.bus_id.isna()]
1113
1114
    logger.debug(
1115
        f"{len(buildings_gdf) - len(gdf)} "
1116
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
1117
        f"of {len(buildings_gdf)} values are outside of the grid areas "
1118
        "and are therefore dropped."
1119
    )
1120
1121
    return gdf
1122
1123
1124
def cap_per_bus_id(
1125
    scenario: str,
1126
) -> pd.DataFrame:
1127
    """
1128
    Get table with total pv rooftop capacity per grid district.
1129
1130
    Parameters
1131
    -----------
1132
    scenario : str
1133
        Scenario name.
1134
    Returns
1135
    -------
1136
    pandas.DataFrame
1137
        DataFrame with total rooftop capacity per mv grid.
1138
    """
1139
    targets = config.datasets()["solar_rooftop"]["targets"]
1140
1141
    sql = f"""
1142
    SELECT bus as bus_id, control, p_nom as capacity
1143
    FROM {targets['generators']['schema']}.{targets['generators']['table']}
1144
    WHERE carrier = 'solar_rooftop'
1145
    AND scn_name = '{scenario}'
1146
    """
1147
1148
    df = db.select_dataframe(sql, index_col="bus_id")
1149
1150
    return df.loc[df.control != "Slack"]
1151
1152
1153
def determine_end_of_life_gens(
1154
    mastr_gdf: gpd.GeoDataFrame,
1155
    scenario_timestamp: pd.Timestamp,
1156
    pv_rooftop_lifetime: pd.Timedelta,
1157
) -> gpd.GeoDataFrame:
1158
    """
1159
    Determine if an old PV system has reached its end of life.
1160
    Parameters
1161
    -----------
1162
    mastr_gdf : geopandas.GeoDataFrame
1163
        GeoDataFrame containing geocoded MaStR data.
1164
    scenario_timestamp : pandas.Timestamp
1165
        Timestamp at which the scenario takes place.
1166
    pv_rooftop_lifetime : pandas.Timedelta
1167
        Average expected lifetime of PV rooftop systems.
1168
    Returns
1169
    -------
1170
    geopandas.GeoDataFrame
1171
        GeoDataFrame containing geocoded MaStR data and info if the system
1172
        has reached its end of life.
1173
    """
1174
    before = mastr_gdf.capacity.sum()
1175
1176
    mastr_gdf = mastr_gdf.assign(
1177
        age=scenario_timestamp - mastr_gdf.commissioning_date
1178
    )
1179
1180
    mastr_gdf = mastr_gdf.assign(
1181
        end_of_life=pv_rooftop_lifetime < mastr_gdf.age
1182
    )
1183
1184
    after = mastr_gdf.loc[~mastr_gdf.end_of_life].capacity.sum()
1185
1186
    logger.debug(
1187
        f"Determined if pv rooftop systems reached their end of life.\nTotal "
1188
        f"capacity: {before}\nActive capacity: {after}"
1189
    )
1190
1191
    return mastr_gdf
1192
1193
1194
def calculate_max_pv_cap_per_building(
1195
    buildings_gdf: gpd.GeoDataFrame,
1196
    mastr_gdf: gpd.GeoDataFrame,
1197
    pv_cap_per_sq_m: float | int,
1198
    roof_factor: float | int,
1199
) -> gpd.GeoDataFrame:
1200
    """
1201
    Calculate the estimated maximum possible PV capacity per building.
1202
1203
    Parameters
1204
    -----------
1205
    buildings_gdf : geopandas.GeoDataFrame
1206
        GeoDataFrame containing OSM buildings data.
1207
    mastr_gdf : geopandas.GeoDataFrame
1208
        GeoDataFrame containing geocoded MaStR data.
1209
    pv_cap_per_sq_m : float, int
1210
        Average expected, installable PV capacity per square meter.
1211
    roof_factor : float, int
1212
        Average for PV usable roof area share.
1213
    Returns
1214
    -------
1215
    geopandas.GeoDataFrame
1216
        GeoDataFrame containing OSM buildings data with estimated maximum PV
1217
        capacity.
1218
    """
1219
    gdf = (
1220
        buildings_gdf.reset_index()
1221
        .rename(columns={"index": "id"})
1222
        .merge(
1223
            mastr_gdf[
1224
                [
1225
                    "capacity",
1226
                    "end_of_life",
1227
                    "building_id",
1228
                    "orientation_uniform",
1229
                    "orientation_primary",
1230
                    "orientation_primary_angle",
1231
                ]
1232
            ],
1233
            how="left",
1234
            left_on="id",
1235
            right_on="building_id",
1236
        )
1237
        .set_index("id")
1238
        .drop(columns="building_id")
1239
    )
1240
1241
    return gdf.assign(
1242
        max_cap=gdf.building_area.multiply(roof_factor * pv_cap_per_sq_m),
1243
        end_of_life=gdf.end_of_life.fillna(True).astype(bool),
1244
        bus_id=gdf.bus_id.astype(int),
1245
    )
1246
1247
1248
def calculate_building_load_factor(
1249
    mastr_gdf: gpd.GeoDataFrame,
1250
    buildings_gdf: gpd.GeoDataFrame,
1251
    rounding: int = 4,
1252
) -> gpd.GeoDataFrame:
1253
    """
1254
    Calculate the roof load factor from existing PV systems.
1255
    Parameters
1256
    -----------
1257
    mastr_gdf : geopandas.GeoDataFrame
1258
        GeoDataFrame containing geocoded MaStR data.
1259
    buildings_gdf : geopandas.GeoDataFrame
1260
        GeoDataFrame containing OSM buildings data.
1261
    rounding : int
1262
        Rounding to use for load factor.
1263
    Returns
1264
    -------
1265
    geopandas.GeoDataFrame
1266
        GeoDataFrame containing geocoded MaStR data with calculated load
1267
        factor.
1268
    """
1269
    gdf = mastr_gdf.merge(
1270
        buildings_gdf[["max_cap", "building_area"]]
1271
        .loc[~buildings_gdf["max_cap"].isna()]
1272
        .reset_index(),
1273
        how="left",
1274
        left_on="building_id",
1275
        right_on="id",
1276
    ).set_index("id")
1277
1278
    return gdf.assign(load_factor=(gdf.capacity / gdf.max_cap).round(rounding))
1279
1280
1281
def get_probability_for_property(
1282
    mastr_gdf: gpd.GeoDataFrame,
1283
    cap_range: tuple[int | float, int | float],
1284
    prop: str,
1285
) -> tuple[np.array, np.array]:
1286
    """
1287
    Calculate the probability of the different options of a property of the
1288
    existing PV plants.
1289
    Parameters
1290
    -----------
1291
    mastr_gdf : geopandas.GeoDataFrame
1292
        GeoDataFrame containing geocoded MaStR data.
1293
    cap_range : tuple(int, int)
1294
        Capacity range of PV plants to look at.
1295
    prop : str
1296
        Property to calculate probabilities for. String needs to be in columns
1297
        of mastr_gdf.
1298
    Returns
1299
    -------
1300
    tuple
1301
        numpy.array
1302
            Unique values of property.
1303
        numpy.array
1304
            Probabilties per unique value.
1305
    """
1306
    cap_range_gdf = mastr_gdf.loc[
1307
        (mastr_gdf.capacity > cap_range[0])
1308
        & (mastr_gdf.capacity <= cap_range[1])
1309
    ]
1310
1311
    if prop == "load_factor":
1312
        cap_range_gdf = cap_range_gdf.loc[cap_range_gdf[prop] <= 1]
1313
1314
    count = Counter(
1315
        cap_range_gdf[prop].loc[
1316
            ~cap_range_gdf[prop].isna()
1317
            & ~cap_range_gdf[prop].isnull()
1318
            & ~(cap_range_gdf[prop] == "None")
1319
        ]
1320
    )
1321
1322
    values = np.array(list(count.keys()))
1323
    probabilities = np.fromiter(count.values(), dtype=float)
1324
    probabilities = probabilities / np.sum(probabilities)
1325
1326
    return values, probabilities
1327
1328
1329
@timer_func
1330
def probabilities(
1331
    mastr_gdf: gpd.GeoDataFrame,
1332
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1333
    properties: list[str] | None = None,
1334
) -> dict:
1335
    """
1336
    Calculate the probability of the different options of properties of the
1337
    existing PV plants.
1338
    Parameters
1339
    -----------
1340
    mastr_gdf : geopandas.GeoDataFrame
1341
        GeoDataFrame containing geocoded MaStR data.
1342
    cap_ranges : list(tuple(int, int))
1343
        List of capacity ranges to distinguish between. The first tuple should
1344
        start with a zero and the last one should end with infinite.
1345
    properties : list(str)
1346
        List of properties to calculate probabilities for. Strings need to be
1347
        in columns of mastr_gdf.
1348
    Returns
1349
    -------
1350
    dict
1351
        Dictionary with values and probabilities per capacity range.
1352
    """
1353
    if cap_ranges is None:
1354
        cap_ranges = [
1355
            (0, 30 / 10**3),
1356
            (30 / 10**3, 100 / 10**3),
1357
            (100 / 10**3, float("inf")),
1358
        ]
1359
    if properties is None:
1360
        properties = [
1361
            "orientation_uniform",
1362
            "orientation_primary",
1363
            "orientation_primary_angle",
1364
            "load_factor",
1365
        ]
1366
1367
    prob_dict = {}
1368
1369
    for cap_range in cap_ranges:
1370
        prob_dict[cap_range] = {
1371
            "values": {},
1372
            "probabilities": {},
1373
        }
1374
1375
        for prop in properties:
1376
            v, p = get_probability_for_property(
1377
                mastr_gdf,
1378
                cap_range,
1379
                prop,
1380
            )
1381
1382
            prob_dict[cap_range]["values"][prop] = v
1383
            prob_dict[cap_range]["probabilities"][prop] = p
1384
1385
    return prob_dict
1386
1387
1388
def cap_share_per_cap_range(
1389
    mastr_gdf: gpd.GeoDataFrame,
1390
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1391
) -> dict[tuple[int | float, int | float], float]:
1392
    """
1393
    Calculate the share of PV capacity from the total PV capacity within
1394
    capacity ranges.
1395
1396
    Parameters
1397
    -----------
1398
    mastr_gdf : geopandas.GeoDataFrame
1399
        GeoDataFrame containing geocoded MaStR data.
1400
    cap_ranges : list(tuple(int, int))
1401
        List of capacity ranges to distinguish between. The first tuple should
1402
        start with a zero and the last one should end with infinite.
1403
    Returns
1404
    -------
1405
    dict
1406
        Dictionary with share of PV capacity from the total PV capacity within
1407
        capacity ranges.
1408
    """
1409
    if cap_ranges is None:
1410
        cap_ranges = [
1411
            (0, 30 / 10**3),
1412
            (30 / 10**3, 100 / 10**3),
1413
            (100 / 10**3, float("inf")),
1414
        ]
1415
1416
    cap_share_dict = {}
1417
1418
    total_cap = mastr_gdf.capacity.sum()
1419
1420
    for cap_range in cap_ranges:
1421
        cap_share = (
1422
            mastr_gdf.loc[
1423
                (mastr_gdf.capacity > cap_range[0])
1424
                & (mastr_gdf.capacity <= cap_range[1])
1425
            ].capacity.sum()
1426
            / total_cap
1427
        )
1428
1429
        cap_share_dict[cap_range] = cap_share
1430
1431
    return cap_share_dict
1432
1433
1434
def mean_load_factor_per_cap_range(
1435
    mastr_gdf: gpd.GeoDataFrame,
1436
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1437
) -> dict[tuple[int | float, int | float], float]:
1438
    """
1439
    Calculate the mean roof load factor per capacity range from existing PV
1440
    plants.
1441
    Parameters
1442
    -----------
1443
    mastr_gdf : geopandas.GeoDataFrame
1444
        GeoDataFrame containing geocoded MaStR data.
1445
    cap_ranges : list(tuple(int, int))
1446
        List of capacity ranges to distinguish between. The first tuple should
1447
        start with a zero and the last one should end with infinite.
1448
    Returns
1449
    -------
1450
    dict
1451
        Dictionary with mean roof load factor per capacity range.
1452
    """
1453
    if cap_ranges is None:
1454
        cap_ranges = [
1455
            (0, 30 / 10**3),
1456
            (30 / 10**3, 100 / 10**3),
1457
            (100 / 10**3, float("inf")),
1458
        ]
1459
1460
    load_factor_dict = {}
1461
1462
    for cap_range in cap_ranges:
1463
        load_factor = mastr_gdf.loc[
1464
            (mastr_gdf.load_factor <= 1)
1465
            & (mastr_gdf.capacity > cap_range[0])
1466
            & (mastr_gdf.capacity <= cap_range[1])
1467
        ].load_factor.mean()
1468
1469
        load_factor_dict[cap_range] = load_factor
1470
1471
    return load_factor_dict
1472
1473
1474
def building_area_range_per_cap_range(
1475
    mastr_gdf: gpd.GeoDataFrame,
1476
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1477
    min_building_size: int | float = 10.0,
1478
    upper_quantile: float = 0.95,
1479
    lower_quantile: float = 0.05,
1480
) -> dict[tuple[int | float, int | float], tuple[int | float, int | float]]:
1481
    """
1482
    Estimate normal building area range per capacity range.
1483
    Calculate the mean roof load factor per capacity range from existing PV
1484
    plants.
1485
    Parameters
1486
    -----------
1487
    mastr_gdf : geopandas.GeoDataFrame
1488
        GeoDataFrame containing geocoded MaStR data.
1489
    cap_ranges : list(tuple(int, int))
1490
        List of capacity ranges to distinguish between. The first tuple should
1491
        start with a zero and the last one should end with infinite.
1492
    min_building_size : int, float
1493
        Minimal building size to consider for PV plants.
1494
    upper_quantile : float
1495
        Upper quantile to estimate maximum building size per capacity range.
1496
    lower_quantile : float
1497
        Lower quantile to estimate minimum building size per capacity range.
1498
    Returns
1499
    -------
1500
    dict
1501
        Dictionary with estimated normal building area range per capacity
1502
        range.
1503
    """
1504
    if cap_ranges is None:
1505
        cap_ranges = [
1506
            (0, 30 / 10**3),
1507
            (30 / 10**3, 100 / 10**3),
1508
            (100 / 10**3, float("inf")),
1509
        ]
1510
1511
    building_area_range_dict = {}
1512
1513
    n_ranges = len(cap_ranges)
1514
1515
    for count, cap_range in enumerate(cap_ranges):
1516
        cap_range_gdf = mastr_gdf.loc[
1517
            (mastr_gdf.capacity > cap_range[0])
1518
            & (mastr_gdf.capacity <= cap_range[1])
1519
        ]
1520
1521
        if count == 0:
1522
            building_area_range_dict[cap_range] = (
1523
                min_building_size,
1524
                cap_range_gdf.building_area.quantile(upper_quantile),
1525
            )
1526
        elif count == n_ranges - 1:
1527
            building_area_range_dict[cap_range] = (
1528
                cap_range_gdf.building_area.quantile(lower_quantile),
1529
                float("inf"),
1530
            )
1531
        else:
1532
            building_area_range_dict[cap_range] = (
1533
                cap_range_gdf.building_area.quantile(lower_quantile),
1534
                cap_range_gdf.building_area.quantile(upper_quantile),
1535
            )
1536
1537
    values = list(building_area_range_dict.values())
1538
1539
    building_area_range_normed_dict = {}
1540
1541
    for count, (cap_range, (min_area, max_area)) in enumerate(
1542
        building_area_range_dict.items()
1543
    ):
1544
        if count == 0:
1545
            building_area_range_normed_dict[cap_range] = (
1546
                min_area,
1547
                np.mean((values[count + 1][0], max_area)),
1548
            )
1549
        elif count == n_ranges - 1:
1550
            building_area_range_normed_dict[cap_range] = (
1551
                np.mean((values[count - 1][1], min_area)),
1552
                max_area,
1553
            )
1554
        else:
1555
            building_area_range_normed_dict[cap_range] = (
1556
                np.mean((values[count - 1][1], min_area)),
1557
                np.mean((values[count + 1][0], max_area)),
1558
            )
1559
1560
    return building_area_range_normed_dict
1561
1562
1563
@timer_func
1564
def desaggregate_pv_in_mv_grid(
1565
    buildings_gdf: gpd.GeoDataFrame,
1566
    pv_cap: float | int,
1567
    **kwargs,
1568
) -> gpd.GeoDataFrame:
1569
    """
1570
    Desaggregate PV capacity on buildings within a given grid district.
1571
    Parameters
1572
    -----------
1573
    buildings_gdf : geopandas.GeoDataFrame
1574
        GeoDataFrame containing buildings within the grid district.
1575
    pv_cap : float, int
1576
        PV capacity to desaggregate.
1577
    Other Parameters
1578
    -----------
1579
    prob_dict : dict
1580
        Dictionary with values and probabilities per capacity range.
1581
    cap_share_dict : dict
1582
        Dictionary with share of PV capacity from the total PV capacity within
1583
        capacity ranges.
1584
    building_area_range_dict : dict
1585
        Dictionary with estimated normal building area range per capacity
1586
        range.
1587
    load_factor_dict : dict
1588
        Dictionary with mean roof load factor per capacity range.
1589
    seed : int
1590
        Seed to use for random operations with NumPy and pandas.
1591
    pv_cap_per_sq_m : float, int
1592
        Average expected, installable PV capacity per square meter.
1593
    Returns
1594
    -------
1595
    geopandas.GeoDataFrame
1596
        GeoDataFrame containing OSM building data with desaggregated PV
1597
        plants.
1598
    """
1599
    bus_id = int(buildings_gdf.bus_id.iat[0])
1600
1601
    rng = default_rng(seed=kwargs["seed"])
1602
    random_state = RandomState(seed=kwargs["seed"])
1603
1604
    results_df = pd.DataFrame(columns=buildings_gdf.columns)
1605
1606
    for cap_range, share in kwargs["cap_share_dict"].items():
1607
        pv_cap_range = pv_cap * share
1608
1609
        b_area_min, b_area_max = kwargs["building_area_range_dict"][cap_range]
1610
1611
        cap_range_buildings_gdf = buildings_gdf.loc[
1612
            ~buildings_gdf.index.isin(results_df.index)
1613
            & (buildings_gdf.building_area > b_area_min)
1614
            & (buildings_gdf.building_area <= b_area_max)
1615
        ]
1616
1617
        mean_load_factor = kwargs["load_factor_dict"][cap_range]
1618
        cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1619
            mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1620
            load_factor=np.nan,
1621
            capacity=np.nan,
1622
        )
1623
1624
        total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1625
1626
        if total_mean_cap == 0:
1627
            logger.warning(
1628
                f"There are no matching roof for capacity range {cap_range} "
1629
                f"kW in grid {bus_id}. Using all buildings as fallback."
1630
            )
1631
1632
            cap_range_buildings_gdf = buildings_gdf.loc[
1633
                ~buildings_gdf.index.isin(results_df.index)
1634
            ]
1635
1636
            if len(cap_range_buildings_gdf) == 0:
1637
                logger.warning(
1638
                    "There are no roofes available for capacity range "
1639
                    f"{cap_range} kW in grid {bus_id}. Allowing dual use."
1640
                )
1641
                cap_range_buildings_gdf = buildings_gdf.copy()
1642
1643
            cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1644
                mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1645
                load_factor=np.nan,
1646
                capacity=np.nan,
1647
            )
1648
1649
            total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1650
1651
        elif total_mean_cap < pv_cap_range:
1652
            logger.warning(
1653
                f"Average roof utilization of the roof area in grid {bus_id} "
1654
                f"and capacity range {cap_range} kW is not sufficient. The "
1655
                "roof utilization will be above average."
1656
            )
1657
1658
        frac = max(
1659
            pv_cap_range / total_mean_cap,
1660
            1 / len(cap_range_buildings_gdf),
1661
        )
1662
1663
        samples_gdf = cap_range_buildings_gdf.sample(
1664
            frac=min(1, frac),
1665
            random_state=random_state,
1666
        )
1667
1668
        cap_range_dict = kwargs["prob_dict"][cap_range]
1669
1670
        values_dict = cap_range_dict["values"]
1671
        p_dict = cap_range_dict["probabilities"]
1672
1673
        load_factors = rng.choice(
1674
            a=values_dict["load_factor"],
1675
            size=len(samples_gdf),
1676
            p=p_dict["load_factor"],
1677
        )
1678
1679
        samples_gdf = samples_gdf.assign(
1680
            load_factor=load_factors,
1681
            capacity=(
1682
                samples_gdf.building_area
1683
                * load_factors
1684
                * kwargs["pv_cap_per_sq_m"]
1685
            ).clip(lower=0.4),
1686
        )
1687
1688
        missing_factor = pv_cap_range / samples_gdf.capacity.sum()
1689
1690
        samples_gdf = samples_gdf.assign(
1691
            capacity=(samples_gdf.capacity * missing_factor),
1692
            load_factor=(samples_gdf.load_factor * missing_factor),
1693
        )
1694
1695
        assert np.isclose(
1696
            samples_gdf.capacity.sum(),
1697
            pv_cap_range,
1698
            rtol=1e-03,
1699
        ), f"{samples_gdf.capacity.sum()} != {pv_cap_range}"
1700
1701
        results_df = pd.concat(
1702
            [
1703
                results_df,
1704
                samples_gdf,
1705
            ],
1706
        )
1707
1708
    total_missing_factor = pv_cap / results_df.capacity.sum()
1709
1710
    results_df = results_df.assign(
1711
        capacity=(results_df.capacity * total_missing_factor),
1712
    )
1713
1714
    assert np.isclose(
1715
        results_df.capacity.sum(),
1716
        pv_cap,
1717
        rtol=1e-03,
1718
    ), f"{results_df.capacity.sum()} != {pv_cap}"
1719
1720
    return gpd.GeoDataFrame(
1721
        results_df,
1722
        crs=samples_gdf.crs,
0 ignored issues
show
introduced by
The variable samples_gdf does not seem to be defined in case the for loop on line 1606 is not entered. Are you sure this can never be the case?
Loading history...
1723
        geometry="geom",
1724
    )
1725
1726
1727
@timer_func
1728
def desaggregate_pv(
1729
    buildings_gdf: gpd.GeoDataFrame,
1730
    cap_df: pd.DataFrame,
1731
    **kwargs,
1732
) -> gpd.GeoDataFrame:
1733
    """
1734
    Desaggregate PV capacity on buildings within a given grid district.
1735
1736
    Parameters
1737
    -----------
1738
    buildings_gdf : geopandas.GeoDataFrame
1739
        GeoDataFrame containing OSM buildings data.
1740
    cap_df : pandas.DataFrame
1741
        DataFrame with total rooftop capacity per mv grid.
1742
    Other Parameters
1743
    -----------
1744
    prob_dict : dict
1745
        Dictionary with values and probabilities per capacity range.
1746
    cap_share_dict : dict
1747
        Dictionary with share of PV capacity from the total PV capacity within
1748
        capacity ranges.
1749
    building_area_range_dict : dict
1750
        Dictionary with estimated normal building area range per capacity
1751
        range.
1752
    load_factor_dict : dict
1753
        Dictionary with mean roof load factor per capacity range.
1754
    seed : int
1755
        Seed to use for random operations with NumPy and pandas.
1756
    pv_cap_per_sq_m : float, int
1757
        Average expected, installable PV capacity per square meter.
1758
    Returns
1759
    -------
1760
    geopandas.GeoDataFrame
1761
        GeoDataFrame containing OSM building data with desaggregated PV
1762
        plants.
1763
    """
1764
    allocated_buildings_gdf = buildings_gdf.loc[~buildings_gdf.end_of_life]
1765
1766
    building_bus_ids = set(buildings_gdf.bus_id)
1767
    cap_bus_ids = set(cap_df.index)
1768
1769
    logger.debug(
1770
        f"Bus IDs from buildings: {len(building_bus_ids)}\nBus IDs from "
1771
        f"capacity: {len(cap_bus_ids)}"
1772
    )
1773
1774
    if len(building_bus_ids) > len(cap_bus_ids):
1775
        missing = building_bus_ids - cap_bus_ids
1776
    else:
1777
        missing = cap_bus_ids - building_bus_ids
1778
1779
    logger.debug(str(missing))
1780
1781
    bus_ids = np.intersect1d(list(building_bus_ids), list(cap_bus_ids))
1782
1783
    # assert set(buildings_gdf.bus_id.unique()) == set(cap_df.index)
1784
1785
    for bus_id in bus_ids:
1786
        buildings_grid_gdf = buildings_gdf.loc[buildings_gdf.bus_id == bus_id]
1787
1788
        pv_installed_gdf = buildings_grid_gdf.loc[
1789
            ~buildings_grid_gdf.end_of_life
1790
        ]
1791
1792
        pv_installed = pv_installed_gdf.capacity.sum()
1793
1794
        pot_buildings_gdf = buildings_grid_gdf.drop(
1795
            index=pv_installed_gdf.index
1796
        )
1797
1798
        if len(pot_buildings_gdf) == 0:
1799
            logger.error(
1800
                f"In grid {bus_id} there are no potential buildings to "
1801
                f"allocate PV capacity to. The grid is skipped. This message "
1802
                f"should only appear doing test runs with few buildings."
1803
            )
1804
1805
            continue
1806
1807
        pv_target = cap_df.at[bus_id, "capacity"]
1808
1809
        logger.debug(f"pv_target: {pv_target}")
1810
1811
        pv_missing = pv_target - pv_installed
1812
1813
        if pv_missing <= 0:
1814
            logger.warning(
1815
                f"In grid {bus_id} there is more PV installed "
1816
                f"({pv_installed: g} kW) in status Quo than allocated within "
1817
                f"the scenario ({pv_target: g} kW). "
1818
                f"No new generators are added."
1819
            )
1820
1821
            continue
1822
1823
        if pot_buildings_gdf.max_cap.sum() < pv_missing:
1824
            logger.error(
1825
                f"In grid {bus_id} there is less PV potential ("
1826
                f"{pot_buildings_gdf.max_cap.sum():g} MW) than allocated PV "
1827
                f"capacity ({pv_missing:g} MW). The average roof utilization "
1828
                f"will be very high."
1829
            )
1830
1831
        gdf = desaggregate_pv_in_mv_grid(
1832
            buildings_gdf=pot_buildings_gdf,
1833
            pv_cap=pv_missing,
1834
            **kwargs,
1835
        )
1836
1837
        logger.debug(f"New cap in grid {bus_id}: {gdf.capacity.sum()}")
1838
        logger.debug(f"Installed cap in grid {bus_id}: {pv_installed}")
1839
        logger.debug(
1840
            f"Total cap in grid {bus_id}: {gdf.capacity.sum() + pv_installed}"
1841
        )
1842
1843
        if not np.isclose(
1844
            gdf.capacity.sum() + pv_installed, pv_target, rtol=1e-3
1845
        ):
1846
            logger.warning(
1847
                f"The desired capacity and actual capacity in grid {bus_id} "
1848
                f"differ.\n"
1849
                f"Desired cap: {pv_target}\nActual cap: "
1850
                f"{gdf.capacity.sum() + pv_installed}"
1851
            )
1852
1853
        pre_cap = allocated_buildings_gdf.capacity.sum()
1854
        new_cap = gdf.capacity.sum()
1855
1856
        allocated_buildings_gdf = pd.concat(
1857
            [
1858
                allocated_buildings_gdf,
1859
                gdf,
1860
            ]
1861
        )
1862
1863
        total_cap = allocated_buildings_gdf.capacity.sum()
1864
1865
        assert np.isclose(pre_cap + new_cap, total_cap)
1866
1867
    logger.debug("Desaggregated scenario.")
1868
    logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}")
1869
    logger.debug(
1870
        f"Generator capacity: " f"{allocated_buildings_gdf.capacity.sum(): g}"
1871
    )
1872
1873
    return gpd.GeoDataFrame(
1874
        allocated_buildings_gdf,
1875
        crs=gdf.crs,
0 ignored issues
show
introduced by
The variable gdf does not seem to be defined for all execution paths.
Loading history...
1876
        geometry="geom",
1877
    )
1878
1879
1880
@timer_func
1881
def add_buildings_meta_data(
1882
    buildings_gdf: gpd.GeoDataFrame,
1883
    prob_dict: dict,
1884
    seed: int,
1885
) -> gpd.GeoDataFrame:
1886
    """
1887
    Randomly add additional metadata to desaggregated PV plants.
1888
    Parameters
1889
    -----------
1890
    buildings_gdf : geopandas.GeoDataFrame
1891
        GeoDataFrame containing OSM buildings data with desaggregated PV
1892
        plants.
1893
    prob_dict : dict
1894
        Dictionary with values and probabilities per capacity range.
1895
    seed : int
1896
        Seed to use for random operations with NumPy and pandas.
1897
    Returns
1898
    -------
1899
    geopandas.GeoDataFrame
1900
        GeoDataFrame containing OSM building data with desaggregated PV
1901
        plants.
1902
    """
1903
    rng = default_rng(seed=seed)
1904
    buildings_gdf = buildings_gdf.reset_index().rename(
1905
        columns={
1906
            "index": "building_id",
1907
        }
1908
    )
1909
1910
    for (min_cap, max_cap), cap_range_prob_dict in prob_dict.items():
1911
        cap_range_gdf = buildings_gdf.loc[
1912
            (buildings_gdf.capacity >= min_cap)
1913
            & (buildings_gdf.capacity < max_cap)
1914
        ]
1915
1916
        for key, values in cap_range_prob_dict["values"].items():
1917
            if key == "load_factor":
1918
                continue
1919
1920
            gdf = cap_range_gdf.loc[
1921
                cap_range_gdf[key].isna()
1922
                | cap_range_gdf[key].isnull()
1923
                | (cap_range_gdf[key] == "None")
1924
            ]
1925
1926
            key_vals = rng.choice(
1927
                a=values,
1928
                size=len(gdf),
1929
                p=cap_range_prob_dict["probabilities"][key],
1930
            )
1931
1932
            buildings_gdf.loc[gdf.index, key] = key_vals
1933
1934
    return buildings_gdf
1935
1936
1937
def add_commissioning_date(
1938
    buildings_gdf: gpd.GeoDataFrame,
1939
    start: pd.Timestamp,
1940
    end: pd.Timestamp,
1941
    seed: int,
1942
):
1943
    """
1944
    Randomly and linear add start-up date to new pv generators.
1945
    Parameters
1946
    ----------
1947
    buildings_gdf : geopandas.GeoDataFrame
1948
        GeoDataFrame containing OSM buildings data with desaggregated PV
1949
        plants.
1950
    start : pandas.Timestamp
1951
        Minimum Timestamp to use.
1952
    end : pandas.Timestamp
1953
        Maximum Timestamp to use.
1954
    seed : int
1955
        Seed to use for random operations with NumPy and pandas.
1956
    Returns
1957
    -------
1958
    geopandas.GeoDataFrame
1959
        GeoDataFrame containing OSM buildings data with start-up date added.
1960
    """
1961
    rng = default_rng(seed=seed)
1962
1963
    date_range = pd.date_range(start=start, end=end, freq="1D")
1964
1965
    return buildings_gdf.assign(
1966
        commissioning_date=rng.choice(date_range, size=len(buildings_gdf))
1967
    )
1968
1969
1970
@timer_func
1971
def allocate_scenarios(
1972
    mastr_gdf: gpd.GeoDataFrame,
1973
    valid_buildings_gdf: gpd.GeoDataFrame,
1974
    last_scenario_gdf: gpd.GeoDataFrame,
1975
    scenario: str,
1976
):
1977
    """
1978
    Desaggregate and allocate scenario pv rooftop ramp-ups onto buildings.
1979
    Parameters
1980
    ----------
1981
    mastr_gdf : geopandas.GeoDataFrame
1982
        GeoDataFrame containing geocoded MaStR data.
1983
    valid_buildings_gdf : geopandas.GeoDataFrame
1984
        GeoDataFrame containing OSM buildings data.
1985
    last_scenario_gdf : geopandas.GeoDataFrame
1986
        GeoDataFrame containing OSM buildings matched with pv generators from
1987
        temporally preceding scenario.
1988
    scenario : str
1989
        Scenario to desaggrgate and allocate.
1990
    Returns
1991
    -------
1992
    tuple
1993
        geopandas.GeoDataFrame
1994
            GeoDataFrame containing OSM buildings matched with pv generators.
1995
        pandas.DataFrame
1996
            DataFrame containing pv rooftop capacity per grid id.
1997
    """
1998
    cap_per_bus_id_df = cap_per_bus_id(scenario)
1999
2000
    logger.debug(
2001
        f"cap_per_bus_id_df total capacity: {cap_per_bus_id_df.capacity.sum()}"
2002
    )
2003
2004
    last_scenario_gdf = determine_end_of_life_gens(
2005
        last_scenario_gdf,
2006
        SCENARIO_TIMESTAMP[scenario],
2007
        PV_ROOFTOP_LIFETIME,
2008
    )
2009
2010
    buildings_gdf = calculate_max_pv_cap_per_building(
2011
        valid_buildings_gdf,
2012
        last_scenario_gdf,
2013
        PV_CAP_PER_SQ_M,
2014
        ROOF_FACTOR,
2015
    )
2016
2017
    mastr_gdf = calculate_building_load_factor(
2018
        mastr_gdf,
2019
        buildings_gdf,
2020
    )
2021
2022
    probabilities_dict = probabilities(
2023
        mastr_gdf,
2024
        cap_ranges=CAP_RANGES,
2025
    )
2026
2027
    cap_share_dict = cap_share_per_cap_range(
2028
        mastr_gdf,
2029
        cap_ranges=CAP_RANGES,
2030
    )
2031
2032
    load_factor_dict = mean_load_factor_per_cap_range(
2033
        mastr_gdf,
2034
        cap_ranges=CAP_RANGES,
2035
    )
2036
2037
    building_area_range_dict = building_area_range_per_cap_range(
2038
        mastr_gdf,
2039
        cap_ranges=CAP_RANGES,
2040
        min_building_size=MIN_BUILDING_SIZE,
2041
        upper_quantile=UPPER_QUANTILE,
2042
        lower_quantile=LOWER_QUANTILE,
2043
    )
2044
2045
    allocated_buildings_gdf = desaggregate_pv(
2046
        buildings_gdf=buildings_gdf,
2047
        cap_df=cap_per_bus_id_df,
2048
        prob_dict=probabilities_dict,
2049
        cap_share_dict=cap_share_dict,
2050
        building_area_range_dict=building_area_range_dict,
2051
        load_factor_dict=load_factor_dict,
2052
        seed=SEED,
2053
        pv_cap_per_sq_m=PV_CAP_PER_SQ_M,
2054
    )
2055
2056
    allocated_buildings_gdf = allocated_buildings_gdf.assign(scenario=scenario)
2057
2058
    meta_buildings_gdf = frame_to_numeric(
2059
        add_buildings_meta_data(
2060
            allocated_buildings_gdf,
2061
            probabilities_dict,
2062
            SEED,
2063
        )
2064
    )
2065
2066
    return (
2067
        add_commissioning_date(
2068
            meta_buildings_gdf,
2069
            start=last_scenario_gdf.commissioning_date.max(),
2070
            end=SCENARIO_TIMESTAMP[scenario],
2071
            seed=SEED,
2072
        ),
2073
        cap_per_bus_id_df,
2074
    )
2075
2076
2077
class EgonPowerPlantPvRoofBuilding(Base):
2078
    """
2079
    Class definition of table supply.egon_power_plants_pv_roof_building.
2080
2081
    """
2082
    __tablename__ = "egon_power_plants_pv_roof_building"
2083
    __table_args__ = {"schema": "supply"}
2084
2085
    index = Column(Integer, primary_key=True, index=True)
2086
    scenario = Column(String)
2087
    bus_id = Column(Integer, nullable=True)
2088
    building_id = Column(Integer)
2089
    gens_id = Column(String, nullable=True)
2090
    capacity = Column(Float)
2091
    orientation_uniform = Column(Float)
2092
    orientation_primary = Column(String)
2093
    orientation_primary_angle = Column(String)
2094
    voltage_level = Column(Integer)
2095
    weather_cell_id = Column(Integer)
2096
2097
2098
def add_metadata():
2099
    schema = "supply"
2100
    table = "egon_power_plants_pv_roof_building"
2101
    name = f"{schema}.{table}"
2102
    deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"]
2103
    deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][
2104
        "zenodo"
2105
    ]["deposit_id"]
2106
2107
    contris = contributors(["kh", "kh"])
2108
2109
    contris[0]["date"] = "2023-03-16"
2110
2111
    contris[0]["object"] = "metadata"
2112
    contris[1]["object"] = "dataset"
2113
2114
    contris[0]["comment"] = "Add metadata to dataset."
2115
    contris[1]["comment"] = "Add workflow to generate dataset."
2116
2117
    meta = {
2118
        "name": name,
2119
        "title": "eGon power plants rooftop solar",
2120
        "id": "WILL_BE_SET_AT_PUBLICATION",
2121
        "description": (
2122
            "eGon power plants rooftop solar systems allocated to buildings"
2123
        ),
2124
        "language": "en-US",
2125
        "keywords": ["photovoltaik", "solar", "pv", "mastr", "status quo"],
2126
        "publicationDate": datetime.date.today().isoformat(),
2127
        "context": context(),
2128
        "spatial": {
2129
            "location": "none",
2130
            "extent": "Germany",
2131
            "resolution": "building",
2132
        },
2133
        "temporal": {
2134
            "referenceDate": (
2135
                config.datasets()["mastr_new"]["egon2021_date_max"].split(" ")[
2136
                    0
2137
                ]
2138
            ),
2139
            "timeseries": {},
2140
        },
2141
        "sources": [
2142
            {
2143
                "title": "Data bundle for egon-data",
2144
                "description": (
2145
                    "Data bundle for egon-data: A transparent and "
2146
                    "reproducible data processing pipeline for energy "
2147
                    "system modeling"
2148
                ),
2149
                "path": (
2150
                    "https://zenodo.org/record/"
2151
                    f"{deposit_id_data_bundle}#.Y_dWM4CZMVM"
2152
                ),
2153
                "licenses": [license_dedl(attribution="© Cußmann, Ilka")],
2154
            },
2155
            {
2156
                "title": ("open-MaStR power unit registry for eGo^n project"),
2157
                "description": (
2158
                    "Data from Marktstammdatenregister (MaStR) data using "
2159
                    "the data dump from 2022-11-17 for eGon-data."
2160
                ),
2161
                "path": (
2162
                    f"https://zenodo.org/record/{deposit_id_mastr}"
2163
                ),
2164
                "licenses": [license_dedl(attribution="© Amme, Jonathan")],
2165
            },
2166
            sources()["openstreetmap"],
2167
            sources()["era5"],
2168
            sources()["vg250"],
2169
            sources()["egon-data"],
2170
        ],
2171
        "licenses": [license_odbl("© eGon development team")],
2172
        "contributors": contris,
2173
        "resources": [
2174
            {
2175
                "profile": "tabular-data-resource",
2176
                "name": name,
2177
                "path": "None",
2178
                "format": "PostgreSQL",
2179
                "encoding": "UTF-8",
2180
                "schema": {
2181
                    "fields": generate_resource_fields_from_db_table(
2182
                        schema,
2183
                        table,
2184
                    ),
2185
                    "primaryKey": "index",
2186
                },
2187
                "dialect": {"delimiter": "", "decimalSeparator": ""},
2188
            }
2189
        ],
2190
        "review": {"path": "", "badge": ""},
2191
        "metaMetadata": meta_metadata(),
2192
        "_comment": {
2193
            "metadata": (
2194
                "Metadata documentation and explanation (https://github."
2195
                "com/OpenEnergyPlatform/oemetadata/blob/master/metadata/"
2196
                "v141/metadata_key_description.md)"
2197
            ),
2198
            "dates": (
2199
                "Dates and time must follow the ISO8601 including time "
2200
                "zone (YYYY-MM-DD or YYYY-MM-DDThh:mm:ss±hh)"
2201
            ),
2202
            "units": "Use a space between numbers and units (100 m)",
2203
            "languages": (
2204
                "Languages must follow the IETF (BCP47) format (en-GB, "
2205
                "en-US, de-DE)"
2206
            ),
2207
            "licenses": (
2208
                "License name must follow the SPDX License List "
2209
                "(https://spdx.org/licenses/)"
2210
            ),
2211
            "review": (
2212
                "Following the OEP Data Review (https://github.com/"
2213
                "OpenEnergyPlatform/data-preprocessing/wiki)"
2214
            ),
2215
            "none": "If not applicable use (none)",
2216
        },
2217
    }
2218
2219
    dialect = get_dialect(meta_metadata()["metadataVersion"])()
2220
2221
    meta = dialect.compile_and_render(dialect.parse(json.dumps(meta)))
2222
2223
    db.submit_comment(
2224
        f"'{json.dumps(meta)}'",
2225
        schema,
2226
        table,
2227
    )
2228
2229
2230
def create_scenario_table(buildings_gdf):
2231
    """Create mapping table pv_unit <-> building for scenario"""
2232
    EgonPowerPlantPvRoofBuilding.__table__.drop(bind=engine, checkfirst=True)
2233
    EgonPowerPlantPvRoofBuilding.__table__.create(bind=engine, checkfirst=True)
2234
2235
    buildings_gdf[COLS_TO_EXPORT].reset_index().to_sql(
2236
        name=EgonPowerPlantPvRoofBuilding.__table__.name,
2237
        schema=EgonPowerPlantPvRoofBuilding.__table__.schema,
2238
        con=db.engine(),
2239
        if_exists="append",
2240
        index=False,
2241
    )
2242
2243
    add_metadata()
2244
2245
2246
def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
2247
    sql = """
2248
    SELECT building_id, zensus_population_id
2249
    FROM boundaries.egon_map_zensus_mvgd_buildings
2250
    """
2251
2252
    buildings_gdf = buildings_gdf.merge(
2253
        right=db.select_dataframe(sql).drop_duplicates(subset="building_id"),
2254
        how="left",
2255
        on="building_id",
2256
    )
2257
2258
    sql = """
2259
    SELECT zensus_population_id, w_id as weather_cell_id
2260
    FROM boundaries.egon_map_zensus_weather_cell
2261
    """
2262
2263
    buildings_gdf = buildings_gdf.merge(
2264
        right=db.select_dataframe(sql).drop_duplicates(
2265
            subset="zensus_population_id"
2266
        ),
2267
        how="left",
2268
        on="zensus_population_id",
2269
    )
2270
2271
    if buildings_gdf.weather_cell_id.isna().any():
2272
        missing = buildings_gdf.loc[
2273
            buildings_gdf.weather_cell_id.isna(), "building_id"
2274
        ].tolist()
2275
2276
        raise ValueError(
2277
            f"Following buildings don't have a weather cell id: {missing}"
2278
        )
2279
2280
    return buildings_gdf
2281
2282
2283
def add_bus_ids_sq(
2284
    buildings_gdf: gpd.GeoDataFrame,
2285
) -> gpd.GeoDataFrame:
2286
    """Add bus ids for status_quo units
2287
2288
    Parameters
2289
    -----------
2290
    buildings_gdf : geopandas.GeoDataFrame
2291
        GeoDataFrame containing OSM buildings data with desaggregated PV
2292
        plants.
2293
    Returns
2294
    -------
2295
    geopandas.GeoDataFrame
2296
        GeoDataFrame containing OSM building data with bus_id per
2297
        generator.
2298
    """
2299
    grid_districts_gdf = grid_districts(EPSG)
2300
2301
    mask = buildings_gdf.scenario == "status_quo"
2302
    buildings_gdf.loc[mask, "bus_id"] = (
2303
        buildings_gdf.loc[mask]
2304
        .sjoin(grid_districts_gdf, how="left")
2305
        .index_right
2306
    )
2307
2308
    return buildings_gdf
2309
2310
2311 View Code Duplication
def infer_voltage_level(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2312
    units_gdf: gpd.GeoDataFrame,
2313
) -> gpd.GeoDataFrame:
2314
    """
2315
    Infer nan values in voltage level derived from generator capacity to
2316
    the power plants.
2317
2318
    Parameters
2319
    -----------
2320
    units_gdf : geopandas.GeoDataFrame
2321
        GeoDataFrame containing units with voltage levels from MaStR
2322
    Returnsunits_gdf: gpd.GeoDataFrame
2323
    -------
2324
    geopandas.GeoDataFrame
2325
        GeoDataFrame containing units all having assigned a voltage level.
2326
    """
2327
2328
    def voltage_levels(p: float) -> int:
2329
        if p <= 0.1:
2330
            return 7
2331
        elif p <= 0.2:
2332
            return 6
2333
        elif p <= 5.5:
2334
            return 5
2335
        elif p <= 20:
2336
            return 4
2337
        elif p <= 120:
2338
            return 3
2339
        return 1
2340
2341
    units_gdf["voltage_level_inferred"] = False
2342
    mask = units_gdf.voltage_level.isna()
2343
    units_gdf.loc[mask, "voltage_level_inferred"] = True
2344
    units_gdf.loc[mask, "voltage_level"] = units_gdf.loc[mask].capacity.apply(
2345
        voltage_levels
2346
    )
2347
2348
    return units_gdf
2349
2350
2351
def pv_rooftop_to_buildings():
2352
    """Main script, executed as task"""
2353
2354
    mastr_gdf = load_mastr_data()
2355
2356
    buildings_gdf = load_building_data()
2357
2358
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_to_buildings(
2359
        mastr_gdf, buildings_gdf
2360
    )
2361
2362
    all_buildings_gdf = (
2363
        desagg_mastr_gdf.assign(scenario="status_quo")
2364
        .reset_index()
2365
        .rename(columns={"geometry": "geom"})
2366
    )
2367
2368
    scenario_buildings_gdf = all_buildings_gdf.copy()
2369
2370
    cap_per_bus_id_df = pd.DataFrame()
2371
2372
    for scenario in SCENARIOS:
2373
        logger.debug(f"Desaggregating scenario {scenario}.")
2374
        (
2375
            scenario_buildings_gdf,
2376
            cap_per_bus_id_scenario_df,
2377
        ) = allocate_scenarios(  # noqa: F841
2378
            desagg_mastr_gdf,
2379
            desagg_buildings_gdf,
2380
            scenario_buildings_gdf,
2381
            scenario,
2382
        )
2383
2384
        all_buildings_gdf = gpd.GeoDataFrame(
2385
            pd.concat(
2386
                [all_buildings_gdf, scenario_buildings_gdf], ignore_index=True
2387
            ),
2388
            crs=scenario_buildings_gdf.crs,
2389
            geometry="geom",
2390
        )
2391
2392
        cap_per_bus_id_df = pd.concat(
2393
            [cap_per_bus_id_df, cap_per_bus_id_scenario_df]
2394
        )
2395
2396
    # add weather cell
2397
    all_buildings_gdf = add_weather_cell_id(all_buildings_gdf)
2398
2399
    # add bus IDs for status quo scenario
2400
    all_buildings_gdf = add_bus_ids_sq(all_buildings_gdf)
2401
2402
    # export scenario
2403
    create_scenario_table(infer_voltage_level(all_buildings_gdf))
2404