allocate_pv()   C
last analyzed

Complexity

Conditions 7

Size

Total Lines 121
Code Lines 58

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 58
dl 0
loc 121
rs 6.9745
c 0
b 0
f 0
cc 7
nop 3

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
Distribute MaStR PV rooftop capacities to OSM and synthetic buildings. Generate
3
new PV rooftop generators for scenarios eGon2035 and eGon100RE.
4
5
See documentation section :ref:`pv-rooftop-ref` for more information.
6
7
"""
8
from __future__ import annotations
9
10
from collections import Counter
11
from functools import wraps
12
from time import perf_counter
13
import datetime
14
import json
15
16
from geoalchemy2 import Geometry
17
from loguru import logger
18
from numpy.random import RandomState, default_rng
19
from omi.dialects import get_dialect
20
from pyproj.crs.crs import CRS
21
from sqlalchemy import BigInteger, Column, Float, Integer, String
22
from sqlalchemy.dialects.postgresql import HSTORE
23
from sqlalchemy.ext.declarative import declarative_base
24
import geopandas as gpd
25
import numpy as np
26
import pandas as pd
27
28
from egon.data import config, db
29
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
30
    OsmBuildingsSynthetic,
31
)
32
from egon.data.datasets.power_plants.mastr_db_classes import EgonPowerPlantsPv
33
from egon.data.datasets.scenario_capacities import EgonScenarioCapacities
34
from egon.data.datasets.scenario_parameters import get_scenario_year
35
from egon.data.datasets.zensus_vg250 import Vg250Gem
36
from egon.data.metadata import (
37
    context,
38
    contributors,
39
    generate_resource_fields_from_db_table,
40
    license_dedl,
41
    license_odbl,
42
    meta_metadata,
43
    meta_metadata,
44
    sources,
45
)
46
47
engine = db.engine()
48
Base = declarative_base()
49
SEED = int(config.settings()["egon-data"]["--random-seed"])
50
51
# TODO: move to yml
52
MASTR_INDEX_COL = "gens_id"
53
54
EPSG = 4326
55
SRID = 3035
56
57
# data cleaning
58
MAX_REALISTIC_PV_CAP = 23500 / 10**3
59
MIN_REALISTIC_PV_CAP = 0.1 / 10**3
60
61
# show additional logging information
62
VERBOSE = False
63
64
# Number of quantiles
65
Q = 5
66
67
# Scenario Data
68
SCENARIOS = config.settings()["egon-data"]["--scenarios"]
69
SCENARIO_TIMESTAMP = {
70
    "status2019": pd.Timestamp("2020-01-01", tz="UTC"),
71
    "status2023": pd.Timestamp("2024-01-01", tz="UTC"),
72
    "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"),
73
    "eGon100RE": pd.Timestamp("2050-01-01", tz="UTC"),
74
}
75
PV_ROOFTOP_LIFETIME = pd.Timedelta(20 * 365, unit="D")
76
77
# Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp
78
# https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf
79
MODUL_CAP = 0.4 / 10**3  # MWp
80
MODUL_SIZE = 1.096 * 1.754  # m²
81
PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE
82
83
# Estimation of usable roof area
84
# Factor for the conversion of building area to roof area
85
# estimation mean roof pitch: 35°
86
# estimation usable roof share: 80%
87
# estimation that only the south side of the building is used for pv
88
# see https://mediatum.ub.tum.de/doc/%20969497/969497.pdf
89
# AREA_FACTOR = 1.221
90
# USABLE_ROOF_SHARE = 0.8
91
# SOUTH_SHARE = 0.5
92
# ROOF_FACTOR = AREA_FACTOR * USABLE_ROOF_SHARE * SOUTH_SHARE
93
ROOF_FACTOR = 0.5
94
95
CAP_RANGES = [
96
    (0, 30 / 10**3),
97
    (30 / 10**3, 100 / 10**3),
98
    (100 / 10**3, float("inf")),
99
]
100
101
MIN_BUILDING_SIZE = 10.0
102
UPPER_QUANTILE = 0.95
103
LOWER_QUANTILE = 0.05
104
105
COLS_TO_EXPORT = [
106
    "scenario",
107
    "bus_id",
108
    "building_id",
109
    "gens_id",
110
    "capacity",
111
    "orientation_uniform",
112
    "orientation_primary",
113
    "orientation_primary_angle",
114
    "voltage_level",
115
    "weather_cell_id",
116
]
117
118
# TODO
119
INCLUDE_SYNTHETIC_BUILDINGS = True
120
ONLY_BUILDINGS_WITH_DEMAND = True
121
TEST_RUN = False
122
123
124
def timer_func(func):
125
    @wraps(func)
126
    def timeit_wrapper(*args, **kwargs):
127
        start_time = perf_counter()
128
        result = func(*args, **kwargs)
129
        end_time = perf_counter()
130
        total_time = end_time - start_time
131
        logger.debug(
132
            f"Function {func.__name__} took {total_time:.4f} seconds."
133
        )
134
        return result
135
136
    return timeit_wrapper
137
138
139
@timer_func
140
def mastr_data(
141
    index_col: str | int | list[str] | list[int],
142
) -> gpd.GeoDataFrame:
143
    """
144
    Read MaStR data from database.
145
146
    Parameters
147
    -----------
148
    index_col : str, int or list of str or int
149
        Column(s) to use as the row labels of the DataFrame.
150
    Returns
151
    -------
152
    pandas.DataFrame
153
        DataFrame containing MaStR data.
154
    """
155
    with db.session_scope() as session:
156
        query = session.query(EgonPowerPlantsPv).filter(
157
            EgonPowerPlantsPv.status == "InBetrieb",
158
            EgonPowerPlantsPv.site_type
159
            == ("Bauliche Anlagen (Hausdach, Gebäude und Fassade)"),
160
        )
161
162
    gdf = gpd.read_postgis(
163
        query.statement, query.session.bind, index_col=index_col
164
    ).drop(columns="id")
165
166
    logger.debug("MaStR data loaded.")
167
168
    return gdf
169
170
171
@timer_func
172
def clean_mastr_data(
173
    mastr_gdf: gpd.GeoDataFrame,
174
    max_realistic_pv_cap: int | float,
175
    min_realistic_pv_cap: int | float,
176
    seed: int,
177
) -> gpd.GeoDataFrame:
178
    """
179
    Clean the MaStR data from implausible data.
180
181
    * Drop MaStR ID duplicates.
182
    * Drop generators with implausible capacities.
183
184
    Parameters
185
    -----------
186
    mastr_gdf : pandas.DataFrame
187
        DataFrame containing MaStR data.
188
    max_realistic_pv_cap : int or float
189
        Maximum capacity, which is considered to be realistic.
190
    min_realistic_pv_cap : int or float
191
        Minimum capacity, which is considered to be realistic.
192
    seed : int
193
        Seed to use for random operations with NumPy and pandas.
194
    Returns
195
    -------
196
    pandas.DataFrame
197
        DataFrame containing cleaned MaStR data.
198
    """
199
    init_len = len(mastr_gdf)
200
201
    # drop duplicates
202
    mastr_gdf = mastr_gdf.loc[~mastr_gdf.index.duplicated()]
203
204
    # drop generators without any capacity info
205
    # and capacity of zero
206
    # and if the capacity is > 23.5 MW, because
207
    # Germanies largest rooftop PV is 23 MW
208
    # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379
209
    mastr_gdf = mastr_gdf.loc[
210
        ~mastr_gdf.capacity.isna()
211
        & (mastr_gdf.capacity <= max_realistic_pv_cap)
212
        & (mastr_gdf.capacity > min_realistic_pv_cap)
213
    ]
214
215
    # get consistent start-up date
216
    # randomly and weighted fill missing start-up dates
217
    pool = mastr_gdf.loc[
218
        ~mastr_gdf.commissioning_date.isna()
219
    ].commissioning_date.to_numpy()
220
221
    size = len(mastr_gdf) - len(pool)
222
223
    if size > 0:
224
        rng = default_rng(seed=seed)
225
226
        choice = rng.choice(
227
            pool,
228
            size=size,
229
            replace=False,
230
        )
231
232
        mastr_gdf.loc[mastr_gdf.commissioning_date.isna()] = mastr_gdf.loc[
233
            mastr_gdf.commissioning_date.isna()
234
        ].assign(commissioning_date=choice)
235
236
        logger.info(
237
            f"Randomly and weigthed added start-up date to {size} generators."
238
        )
239
240
    mastr_gdf = mastr_gdf.assign(
241
        commissioning_date=pd.to_datetime(
242
            mastr_gdf.commissioning_date, utc=True
243
        )
244
    )
245
246
    end_len = len(mastr_gdf)
247
    logger.debug(
248
        f"Dropped {init_len - end_len} "
249
        f"({((init_len - end_len) / init_len) * 100:g}%)"
250
        f" of {init_len} rows from MaStR DataFrame."
251
    )
252
253
    return mastr_gdf
254
255
256
@timer_func
257
def municipality_data() -> gpd.GeoDataFrame:
258
    """
259
    Get municipality data from eGo^n Database.
260
    Returns
261
    -------
262
    gepandas.GeoDataFrame
263
        GeoDataFrame with municipality data.
264
    """
265
    with db.session_scope() as session:
266
        query = session.query(Vg250Gem.ags, Vg250Gem.geometry.label("geom"))
267
268
    return gpd.read_postgis(
269
        query.statement, query.session.bind, index_col="ags"
270
    )
271
272
273
@timer_func
274
def add_ags_to_gens(
275
    mastr_gdf: gpd.GeoDataFrame,
276
    municipalities_gdf: gpd.GeoDataFrame,
277
) -> gpd.GeoDataFrame:
278
    """
279
    Add information about AGS ID to generators.
280
281
    Parameters
282
    -----------
283
    mastr_gdf : geopandas.GeoDataFrame
284
        GeoDataFrame with valid and cleaned MaStR data.
285
    municipalities_gdf : geopandas.GeoDataFrame
286
        GeoDataFrame with municipality data.
287
    Returns
288
    -------
289
    gepandas.GeoDataFrame
290
        GeoDataFrame with valid and cleaned MaStR data
291
        with AGS ID added.
292
    """
293
    return mastr_gdf.sjoin(
294
        municipalities_gdf,
295
        how="left",
296
        predicate="intersects",
297
    ).rename(columns={"index_right": "ags"})
298
299
300
def drop_gens_outside_muns(
301
    mastr_gdf: gpd.GeoDataFrame,
302
) -> gpd.GeoDataFrame:
303
    """
304
    Drop all generators outside of municipalities.
305
306
    Parameters
307
    -----------
308
    mastr_gdf : geopandas.GeoDataFrame
309
        GeoDataFrame with valid and cleaned MaStR data.
310
    Returns
311
    -------
312
    gepandas.GeoDataFrame
313
        GeoDataFrame with valid and cleaned MaStR data
314
        with generatos without an AGS ID dropped.
315
    """
316
    gdf = mastr_gdf.loc[~mastr_gdf.ags.isna()]
317
318
    logger.debug(
319
        f"{len(mastr_gdf) - len(gdf)} ("
320
        f"{(len(mastr_gdf) - len(gdf)) / len(mastr_gdf) * 100:g}%)"
321
        f" of {len(mastr_gdf)} values are outside of the municipalities"
322
        " and are therefore dropped."
323
    )
324
325
    return gdf
326
327
328
def load_mastr_data():
329
    """Read PV rooftop data from MaStR CSV
330
    Note: the source will be replaced as soon as the MaStR data is available
331
    in DB.
332
333
    Returns
334
    -------
335
    geopandas.GeoDataFrame
336
        GeoDataFrame containing MaStR data with geocoded locations.
337
    """
338
    mastr_gdf = mastr_data(
339
        MASTR_INDEX_COL,
340
    )
341
342
    clean_mastr_gdf = clean_mastr_data(
343
        mastr_gdf,
344
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
345
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
346
        seed=SEED,
347
    )
348
349
    municipalities_gdf = municipality_data()
350
351
    clean_mastr_gdf = add_ags_to_gens(clean_mastr_gdf, municipalities_gdf)
352
353
    return drop_gens_outside_muns(clean_mastr_gdf)
354
355
356
class OsmBuildingsFiltered(Base):
357
    """
358
    Class definition of table openstreetmap.osm_buildings_filtered.
359
360
    """
361
    __tablename__ = "osm_buildings_filtered"
362
    __table_args__ = {"schema": "openstreetmap"}
363
364
    osm_id = Column(BigInteger)
365
    amenity = Column(String)
366
    building = Column(String)
367
    name = Column(String)
368
    geom = Column(Geometry(srid=SRID), index=True)
369
    area = Column(Float)
370
    geom_point = Column(Geometry(srid=SRID), index=True)
371
    tags = Column(HSTORE)
372
    id = Column(BigInteger, primary_key=True, index=True)
373
374
375
@timer_func
376
def osm_buildings(
377
    to_crs: CRS,
378
) -> gpd.GeoDataFrame:
379
    """
380
    Read OSM buildings data from eGo^n Database.
381
382
    Parameters
383
    -----------
384
    to_crs : pyproj.crs.crs.CRS
385
        CRS to transform geometries to.
386
    Returns
387
    -------
388
    geopandas.GeoDataFrame
389
        GeoDataFrame containing OSM buildings data.
390
    """
391
    with db.session_scope() as session:
392
        query = session.query(
393
            OsmBuildingsFiltered.id,
394
            OsmBuildingsFiltered.area,
395
            OsmBuildingsFiltered.geom_point.label("geom"),
396
        )
397
398
    return gpd.read_postgis(
399
        query.statement, query.session.bind, index_col="id"
400
    ).to_crs(to_crs)
401
402
403
@timer_func
404
def synthetic_buildings(
405
    to_crs: CRS,
406
) -> gpd.GeoDataFrame:
407
    """
408
    Read synthetic buildings data from eGo^n Database.
409
410
    Parameters
411
    -----------
412
    to_crs : pyproj.crs.crs.CRS
413
        CRS to transform geometries to.
414
    Returns
415
    -------
416
    geopandas.GeoDataFrame
417
        GeoDataFrame containing OSM buildings data.
418
    """
419
    with db.session_scope() as session:
420
        query = session.query(
421
            OsmBuildingsSynthetic.id,
422
            OsmBuildingsSynthetic.area,
423
            OsmBuildingsSynthetic.geom_point.label("geom"),
424
        )
425
426
    return gpd.read_postgis(
427
        query.statement, query.session.bind, index_col="id"
428
    ).to_crs(to_crs)
429
430
431
@timer_func
432
def add_ags_to_buildings(
433
    buildings_gdf: gpd.GeoDataFrame,
434
    municipalities_gdf: gpd.GeoDataFrame,
435
) -> gpd.GeoDataFrame:
436
    """
437
    Add information about AGS ID to buildings.
438
439
    Parameters
440
    -----------
441
    buildings_gdf : geopandas.GeoDataFrame
442
        GeoDataFrame containing OSM buildings data.
443
    municipalities_gdf : geopandas.GeoDataFrame
444
        GeoDataFrame with municipality data.
445
    Returns
446
    -------
447
    gepandas.GeoDataFrame
448
        GeoDataFrame containing OSM buildings data
449
        with AGS ID added.
450
    """
451
    return buildings_gdf.sjoin(
452
        municipalities_gdf,
453
        how="left",
454
        predicate="intersects",
455
    ).rename(columns={"index_right": "ags"})
456
457
458
def drop_buildings_outside_muns(
459
    buildings_gdf: gpd.GeoDataFrame,
460
) -> gpd.GeoDataFrame:
461
    """
462
    Drop all buildings outside of municipalities.
463
464
    Parameters
465
    -----------
466
    buildings_gdf : geopandas.GeoDataFrame
467
        GeoDataFrame containing OSM buildings data.
468
    Returns
469
    -------
470
    gepandas.GeoDataFrame
471
        GeoDataFrame containing OSM buildings data
472
        with buildings without an AGS ID dropped.
473
    """
474
    gdf = buildings_gdf.loc[~buildings_gdf.ags.isna()]
475
476
    logger.debug(
477
        f"{len(buildings_gdf) - len(gdf)} "
478
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
479
        f"of {len(buildings_gdf)} values are outside of the municipalities "
480
        "and are therefore dropped."
481
    )
482
483
    return gdf
484
485
486
def egon_building_peak_loads():
487
488
    # use active scenario wich is closest to today
489
    scenario = sorted(SCENARIOS, key=get_scenario_year)[0]
490
491
    sql = f"""
492
    SELECT building_id
493
    FROM demand.egon_building_electricity_peak_loads
494
    WHERE scenario = '{scenario}'
495
    """
496
497
    return (
498
        db.select_dataframe(sql).building_id.astype(int).sort_values().unique()
499
    )
500
501
502
@timer_func
503
def load_building_data():
504
    """
505
    Read buildings from DB
506
    Tables:
507
508
    * `openstreetmap.osm_buildings_filtered` (from OSM)
509
    * `openstreetmap.osm_buildings_synthetic` (synthetic, created by us)
510
511
    Use column `id` for both as it is unique hence you concat both datasets.
512
    If INCLUDE_SYNTHETIC_BUILDINGS is False synthetic buildings will not be
513
    loaded.
514
515
    Returns
516
    -------
517
    gepandas.GeoDataFrame
518
        GeoDataFrame containing OSM buildings data with buildings without an
519
        AGS ID dropped.
520
    """
521
522
    municipalities_gdf = municipality_data()
523
524
    osm_buildings_gdf = osm_buildings(municipalities_gdf.crs)
525
526
    if INCLUDE_SYNTHETIC_BUILDINGS:
527
        synthetic_buildings_gdf = synthetic_buildings(municipalities_gdf.crs)
528
529
        buildings_gdf = gpd.GeoDataFrame(
530
            pd.concat(
531
                [
532
                    osm_buildings_gdf,
533
                    synthetic_buildings_gdf,
534
                ]
535
            ),
536
            geometry="geom",
537
            crs=osm_buildings_gdf.crs,
538
        ).rename(columns={"area": "building_area"})
539
540
        buildings_gdf.index = buildings_gdf.index.astype(int)
541
542
    else:
543
        buildings_gdf = osm_buildings_gdf.rename(
544
            columns={"area": "building_area"}
545
        )
546
547
    if ONLY_BUILDINGS_WITH_DEMAND:
548
        building_ids = egon_building_peak_loads()
549
550
        init_len = len(building_ids)
551
552
        building_ids = np.intersect1d(
553
            list(map(int, building_ids)),
554
            list(map(int, buildings_gdf.index.to_numpy())),
555
        )
556
557
        end_len = len(building_ids)
558
559
        logger.debug(
560
            f"{end_len/init_len * 100: g} % ({end_len} / {init_len}) "
561
            f"of buildings have peak load."
562
        )
563
564
        buildings_gdf = buildings_gdf.loc[building_ids]
565
566
    buildings_ags_gdf = add_ags_to_buildings(buildings_gdf, municipalities_gdf)
567
568
    buildings_ags_gdf = drop_buildings_outside_muns(buildings_ags_gdf)
569
570
    grid_districts_gdf = grid_districts(EPSG)
571
572
    federal_state_gdf = federal_state_data(grid_districts_gdf.crs)
573
574
    grid_federal_state_gdf = overlay_grid_districts_with_counties(
575
        grid_districts_gdf,
576
        federal_state_gdf,
577
    )
578
579
    buildings_overlay_gdf = add_overlay_id_to_buildings(
580
        buildings_ags_gdf,
581
        grid_federal_state_gdf,
582
    )
583
584
    logger.debug("Loaded buildings.")
585
586
    buildings_overlay_gdf = drop_buildings_outside_grids(buildings_overlay_gdf)
587
588
    # overwrite bus_id with data from new table
589
    sql = (
590
        "SELECT building_id, bus_id FROM "
591
        "boundaries.egon_map_zensus_mvgd_buildings"
592
    )
593
    map_building_bus_df = db.select_dataframe(sql)
594
595
    building_ids = np.intersect1d(
596
        list(map(int, map_building_bus_df.building_id.unique())),
597
        list(map(int, buildings_overlay_gdf.index.to_numpy())),
598
    )
599
600
    buildings_within_gdf = buildings_overlay_gdf.loc[building_ids]
601
602
    gdf = (
603
        buildings_within_gdf.reset_index()
604
        .drop(columns=["bus_id"])
605
        .merge(
606
            how="left",
607
            right=map_building_bus_df,
608
            left_on="id",
609
            right_on="building_id",
610
        )
611
        .drop(columns=["building_id"])
612
        .set_index("id")
613
        .sort_index()
614
    )
615
616
    return gdf[~gdf.index.duplicated(keep="first")]
617
618
619
@timer_func
620
def sort_and_qcut_df(
621
    df: pd.DataFrame | gpd.GeoDataFrame,
622
    col: str,
623
    q: int,
624
) -> pd.DataFrame | gpd.GeoDataFrame:
625
    """
626
    Determine the quantile of a given attribute in a (Geo)DataFrame.
627
    Sort the (Geo)DataFrame in ascending order for the given attribute.
628
629
    Parameters
630
    -----------
631
    df : pandas.DataFrame or geopandas.GeoDataFrame
632
        (Geo)DataFrame to sort and qcut.
633
    col : str
634
        Name of the attribute to sort and qcut the (Geo)DataFrame on.
635
    q : int
636
        Number of quantiles.
637
    Returns
638
    -------
639
    pandas.DataFrame or gepandas.GeoDataFrame
640
        Sorted and qcut (Geo)DataFrame.
641
    """
642
    df = df.sort_values(col, ascending=True)
643
644
    return df.assign(
645
        quant=pd.qcut(
646
            df[col],
647
            q=q,
648
            labels=range(q),
649
        )
650
    )
651
652
653
@timer_func
654
def allocate_pv(
655
    q_mastr_gdf: gpd.GeoDataFrame,
656
    q_buildings_gdf: gpd.GeoDataFrame,
657
    seed: int,
658
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
659
    """
660
    Allocate the MaStR pv generators to the OSM buildings.
661
    This will determine a building for each pv generator if there are more
662
    buildings than generators within a given AGS. Primarily generators are
663
    distributed with the same qunatile as the buildings. Multiple assignment
664
    is excluded.
665
666
    Parameters
667
    -----------
668
    q_mastr_gdf : geopandas.GeoDataFrame
669
        GeoDataFrame containing geocoded and qcut MaStR data.
670
    q_buildings_gdf : geopandas.GeoDataFrame
671
        GeoDataFrame containing qcut OSM buildings data.
672
    seed : int
673
        Seed to use for random operations with NumPy and pandas.
674
    Returns
675
    -------
676
    tuple with two geopandas.GeoDataFrame s
677
        GeoDataFrame containing MaStR data allocated to building IDs.
678
        GeoDataFrame containing building data allocated to MaStR IDs.
679
    """
680
    rng = default_rng(seed=seed)
681
682
    q_buildings_gdf = q_buildings_gdf.assign(gens_id=np.nan).sort_values(
683
        by=["ags", "quant"]
684
    )
685
    q_mastr_gdf = q_mastr_gdf.assign(building_id=np.nan).sort_values(
686
        by=["ags", "quant"]
687
    )
688
689
    ags_list = q_buildings_gdf.ags.unique()
690
691
    if TEST_RUN:
692
        ags_list = ags_list[:250]
693
694
    num_ags = len(ags_list)
695
696
    t0 = perf_counter()
697
698
    for count, ags in enumerate(ags_list):
699
        buildings = q_buildings_gdf.loc[q_buildings_gdf.ags == ags]
700
        gens = q_mastr_gdf.loc[q_mastr_gdf.ags == ags]
701
702
        len_build = len(buildings)
703
        len_gens = len(gens)
704
705
        if len_build < len_gens:
706
            gens = gens.sample(len_build, random_state=RandomState(seed=seed))
707
            logger.error(
708
                f"There are {len_gens} generators and only {len_build}"
709
                f" buildings in AGS {ags}. {len_gens - len(gens)} "
710
                "generators were truncated to match the amount of buildings."
711
            )
712
713
            assert len_build == len(gens)
714
715
        for quant in gens.quant.unique():
716
            q_buildings = buildings.loc[buildings.quant == quant]
717
            q_gens = gens.loc[gens.quant == quant]
718
719
            len_build = len(q_buildings)
720
            len_gens = len(q_gens)
721
722
            if len_build < len_gens:
723
                delta = len_gens - len_build
724
725
                logger.warning(
726
                    f"There are {len_gens} generators and only {len_build} "
727
                    f"buildings in AGS {ags} and quantile {quant}. {delta} "
728
                    f"buildings from AGS {ags} will be added randomly."
729
                )
730
731
                add_buildings = pd.Index(
732
                    rng.choice(
733
                        list(set(buildings.index) - set(q_buildings.index)),
734
                        size=delta,
735
                        replace=False,
736
                    )
737
                )
738
739
                chosen_buildings = q_buildings.index.union(add_buildings)
740
741
            else:
742
                chosen_buildings = rng.choice(
743
                    q_buildings.index,
744
                    size=len_gens,
745
                    replace=False,
746
                )
747
748
            q_buildings_gdf.loc[chosen_buildings, "gens_id"] = q_gens.index
749
            buildings = buildings.drop(chosen_buildings)
750
751
        if count % 500 == 0:
752
            logger.debug(
753
                f"Allocation of {count / num_ags * 100:g} % of AGS done. "
754
                f"It took {perf_counter() - t0:g} seconds."
755
            )
756
757
            t0 = perf_counter()
758
759
    assigned_buildings = q_buildings_gdf.loc[~q_buildings_gdf.gens_id.isna()]
760
761
    assert len(assigned_buildings) == len(assigned_buildings.gens_id.unique())
762
763
    q_mastr_gdf.loc[assigned_buildings.gens_id, "building_id"] = (
764
        assigned_buildings.index
765
    )
766
767
    assigned_gens = q_mastr_gdf.loc[~q_mastr_gdf.building_id.isna()]
768
769
    assert len(assigned_buildings) == len(assigned_gens)
770
771
    logger.debug("Allocated status quo generators to buildings.")
772
773
    return frame_to_numeric(q_mastr_gdf), frame_to_numeric(q_buildings_gdf)
774
775
776
def frame_to_numeric(
777
    df: pd.DataFrame | gpd.GeoDataFrame,
778
) -> pd.DataFrame | gpd.GeoDataFrame:
779
    """
780
    Try to convert all columns of a DataFrame to numeric ignoring errors.
781
782
    Parameters
783
    ----------
784
    df : pandas.DataFrame or geopandas.GeoDataFrame
785
    Returns
786
    -------
787
    pandas.DataFrame or geopandas.GeoDataFrame
788
    """
789
    if str(df.index.dtype) == "object":
790
        df.index = pd.to_numeric(df.index, errors="ignore")
791
792
    for col in df.columns:
793
        if str(df[col].dtype) == "object":
794
            df[col] = pd.to_numeric(df[col], errors="ignore")
795
796
    return df
797
798
799
def validate_output(
800
    desagg_mastr_gdf: pd.DataFrame | gpd.GeoDataFrame,
801
    desagg_buildings_gdf: pd.DataFrame | gpd.GeoDataFrame,
802
) -> None:
803
    """
804
    Validate output.
805
806
    * Validate that there are exactly as many buildings with a pv system as
807
      there are pv systems with a building
808
    * Validate that the building IDs with a pv system are the same building
809
      IDs as assigned to the pv systems
810
    * Validate that the pv system IDs with a building are the same pv system
811
      IDs as assigned to the buildings
812
813
    Parameters
814
    -----------
815
    desagg_mastr_gdf : geopandas.GeoDataFrame
816
        GeoDataFrame containing MaStR data allocated to building IDs.
817
    desagg_buildings_gdf : geopandas.GeoDataFrame
818
        GeoDataFrame containing building data allocated to MaStR IDs.
819
    """
820
    assert len(
821
        desagg_mastr_gdf.loc[~desagg_mastr_gdf.building_id.isna()]
822
    ) == len(desagg_buildings_gdf.loc[~desagg_buildings_gdf.gens_id.isna()])
823
    assert (
824
        np.sort(
825
            desagg_mastr_gdf.loc[
826
                ~desagg_mastr_gdf.building_id.isna()
827
            ].building_id.unique()
828
        )
829
        == np.sort(
830
            desagg_buildings_gdf.loc[
831
                ~desagg_buildings_gdf.gens_id.isna()
832
            ].index.unique()
833
        )
834
    ).all()
835
    assert (
836
        np.sort(
837
            desagg_mastr_gdf.loc[
838
                ~desagg_mastr_gdf.building_id.isna()
839
            ].index.unique()
840
        )
841
        == np.sort(
842
            desagg_buildings_gdf.loc[
843
                ~desagg_buildings_gdf.gens_id.isna()
844
            ].gens_id.unique()
845
        )
846
    ).all()
847
848
    logger.debug("Validated output.")
849
850
851
def drop_unallocated_gens(
852
    gdf: gpd.GeoDataFrame,
853
) -> gpd.GeoDataFrame:
854
    """
855
    Drop generators which did not get allocated.
856
857
    Parameters
858
    -----------
859
    gdf : geopandas.GeoDataFrame
860
        GeoDataFrame containing MaStR data allocated to building IDs.
861
    Returns
862
    -------
863
    geopandas.GeoDataFrame
864
        GeoDataFrame containing MaStR data with generators dropped which did
865
        not get allocated.
866
    """
867
    init_len = len(gdf)
868
    gdf = gdf.loc[~gdf.building_id.isna()]
869
    end_len = len(gdf)
870
871
    logger.debug(
872
        f"Dropped {init_len - end_len} "
873
        f"({((init_len - end_len) / init_len) * 100:g}%)"
874
        f" of {init_len} unallocated rows from MaStR DataFrame."
875
    )
876
877
    return gdf
878
879
880
@timer_func
881
def allocate_to_buildings(
882
    mastr_gdf: gpd.GeoDataFrame,
883
    buildings_gdf: gpd.GeoDataFrame,
884
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
885
    """
886
    Allocate status quo pv rooftop generators to buildings.
887
888
    Parameters
889
    -----------
890
    mastr_gdf : geopandas.GeoDataFrame
891
        GeoDataFrame containing MaStR data with geocoded locations.
892
    buildings_gdf : geopandas.GeoDataFrame
893
        GeoDataFrame containing OSM buildings data with buildings without an
894
        AGS ID dropped.
895
    Returns
896
    -------
897
    tuple with two geopandas.GeoDataFrame s
898
        GeoDataFrame containing MaStR data allocated to building IDs.
899
        GeoDataFrame containing building data allocated to MaStR IDs.
900
    """
901
    logger.debug("Starting allocation of status quo.")
902
903
    q_mastr_gdf = sort_and_qcut_df(mastr_gdf, col="capacity", q=Q)
904
    q_buildings_gdf = sort_and_qcut_df(buildings_gdf, col="building_area", q=Q)
905
906
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_pv(
907
        q_mastr_gdf, q_buildings_gdf, SEED
908
    )
909
910
    validate_output(desagg_mastr_gdf, desagg_buildings_gdf)
911
912
    return drop_unallocated_gens(desagg_mastr_gdf), desagg_buildings_gdf
913
914
915
@timer_func
916
def grid_districts(
917
    epsg: int,
918
) -> gpd.GeoDataFrame:
919
    """
920
    Load mv grid district geo data from eGo^n Database as
921
    geopandas.GeoDataFrame.
922
923
    Parameters
924
    -----------
925
    epsg : int
926
        EPSG ID to use as CRS.
927
    Returns
928
    -------
929
    geopandas.GeoDataFrame
930
        GeoDataFrame containing mv grid district ID and geo shapes data.
931
    """
932
    gdf = db.select_geodataframe(
933
        """
934
        SELECT bus_id, geom
935
        FROM grid.egon_mv_grid_district
936
        ORDER BY bus_id
937
        """,
938
        index_col="bus_id",
939
        geom_col="geom",
940
        epsg=epsg,
941
    )
942
943
    gdf.index = gdf.index.astype(int)
944
945
    logger.debug("Grid districts loaded.")
946
947
    return gdf
948
949
950
def scenario_data(
951
    carrier: str = "solar_rooftop",
952
    scenario: str = "eGon2035",
953
) -> pd.DataFrame:
954
    """
955
    Get scenario capacity data from eGo^n Database.
956
957
    Parameters
958
    -----------
959
    carrier : str
960
        Carrier type to filter table by.
961
    scenario : str
962
        Scenario to filter table by.
963
    Returns
964
    -------
965
    geopandas.GeoDataFrame
966
        GeoDataFrame with scenario capacity data in GW.
967
    """
968
    with db.session_scope() as session:
969
        query = session.query(EgonScenarioCapacities).filter(
970
            EgonScenarioCapacities.carrier == carrier,
971
            EgonScenarioCapacities.scenario_name == scenario,
972
        )
973
974
    df = pd.read_sql(
975
        query.statement, query.session.bind, index_col="index"
976
    ).sort_index()
977
978
    logger.debug("Scenario capacity data loaded.")
979
980
    return df
981
982
983 View Code Duplication
class Vg250Lan(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
984
    """
985
    Class definition of table boundaries.vg250_lan.
986
987
    """
988
    __tablename__ = "vg250_lan"
989
    __table_args__ = {"schema": "boundaries"}
990
991
    id = Column(BigInteger, primary_key=True, index=True)
992
    ade = Column(BigInteger)
993
    gf = Column(BigInteger)
994
    bsg = Column(BigInteger)
995
    ars = Column(String)
996
    ags = Column(String)
997
    sdv_ars = Column(String)
998
    gen = Column(String)
999
    bez = Column(String)
1000
    ibz = Column(BigInteger)
1001
    bem = Column(String)
1002
    nbd = Column(String)
1003
    sn_l = Column(String)
1004
    sn_r = Column(String)
1005
    sn_k = Column(String)
1006
    sn_v1 = Column(String)
1007
    sn_v2 = Column(String)
1008
    sn_g = Column(String)
1009
    fk_s3 = Column(String)
1010
    nuts = Column(String)
1011
    ars_0 = Column(String)
1012
    ags_0 = Column(String)
1013
    wsk = Column(String)
1014
    debkg_id = Column(String)
1015
    rs = Column(String)
1016
    sdv_rs = Column(String)
1017
    rs_0 = Column(String)
1018
    geometry = Column(Geometry(srid=EPSG), index=True)
1019
1020
1021
def federal_state_data(to_crs: CRS) -> gpd.GeoDataFrame:
1022
    """
1023
    Get feder state data from eGo^n Database.
1024
1025
    Parameters
1026
    -----------
1027
    to_crs : pyproj.crs.crs.CRS
1028
        CRS to transform geometries to.
1029
    Returns
1030
    -------
1031
    geopandas.GeoDataFrame
1032
        GeoDataFrame with federal state data.
1033
    """
1034
    with db.session_scope() as session:
1035
        query = session.query(
1036
            Vg250Lan.id, Vg250Lan.nuts, Vg250Lan.geometry.label("geom")
1037
        )
1038
1039
        gdf = gpd.read_postgis(
1040
            query.statement, session.connection(), index_col="id"
1041
        ).to_crs(to_crs)
1042
1043
    logger.debug("Federal State data loaded.")
1044
1045
    return gdf
1046
1047
1048
@timer_func
1049
def overlay_grid_districts_with_counties(
1050
    mv_grid_district_gdf: gpd.GeoDataFrame,
1051
    federal_state_gdf: gpd.GeoDataFrame,
1052
) -> gpd.GeoDataFrame:
1053
    """
1054
    Calculate the intersections of mv grid districts and counties.
1055
1056
    Parameters
1057
    -----------
1058
    mv_grid_district_gdf : gpd.GeoDataFrame
1059
        GeoDataFrame containing mv grid district ID and geo shapes data.
1060
    federal_state_gdf : gpd.GeoDataFrame
1061
        GeoDataFrame with federal state data.
1062
    Returns
1063
    -------
1064
    geopandas.GeoDataFrame
1065
        GeoDataFrame containing OSM buildings data.
1066
    """
1067
    logger.debug(
1068
        "Calculating intersection overlay between mv grid districts and "
1069
        "counties. This may take a while..."
1070
    )
1071
1072
    gdf = gpd.overlay(
1073
        federal_state_gdf.to_crs(mv_grid_district_gdf.crs),
1074
        mv_grid_district_gdf.reset_index(),
1075
        how="intersection",
1076
        keep_geom_type=True,
1077
    )
1078
1079
    logger.debug("Done!")
1080
1081
    return gdf
1082
1083
1084
@timer_func
1085
def add_overlay_id_to_buildings(
1086
    buildings_gdf: gpd.GeoDataFrame,
1087
    grid_federal_state_gdf: gpd.GeoDataFrame,
1088
) -> gpd.GeoDataFrame:
1089
    """
1090
    Add information about overlay ID to buildings.
1091
1092
    Parameters
1093
    -----------
1094
    buildings_gdf : geopandas.GeoDataFrame
1095
        GeoDataFrame containing OSM buildings data.
1096
    grid_federal_state_gdf : geopandas.GeoDataFrame
1097
        GeoDataFrame with intersection shapes between counties and grid
1098
        districts.
1099
    Returns
1100
    -------
1101
    geopandas.GeoDataFrame
1102
        GeoDataFrame containing OSM buildings data with overlay ID added.
1103
    """
1104
    gdf = (
1105
        buildings_gdf.to_crs(grid_federal_state_gdf.crs)
1106
        .sjoin(
1107
            grid_federal_state_gdf,
1108
            how="left",
1109
            predicate="intersects",
1110
        )
1111
        .rename(columns={"index_right": "overlay_id"})
1112
    )
1113
1114
    logger.debug("Added overlay ID to OSM buildings.")
1115
1116
    return gdf
1117
1118
1119
def drop_buildings_outside_grids(
1120
    buildings_gdf: gpd.GeoDataFrame,
1121
) -> gpd.GeoDataFrame:
1122
    """
1123
    Drop all buildings outside of grid areas.
1124
1125
    Parameters
1126
    -----------
1127
    buildings_gdf : geopandas.GeoDataFrame
1128
        GeoDataFrame containing OSM buildings data.
1129
    Returns
1130
    -------
1131
    gepandas.GeoDataFrame
1132
        GeoDataFrame containing OSM buildings data
1133
        with buildings without an bus ID dropped.
1134
    """
1135
    gdf = buildings_gdf.loc[~buildings_gdf.bus_id.isna()]
1136
1137
    logger.debug(
1138
        f"{len(buildings_gdf) - len(gdf)} "
1139
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
1140
        f"of {len(buildings_gdf)} values are outside of the grid areas "
1141
        "and are therefore dropped."
1142
    )
1143
1144
    return gdf
1145
1146
1147
def cap_per_bus_id(
1148
    scenario: str,
1149
) -> pd.DataFrame:
1150
    """
1151
    Get table with total pv rooftop capacity per grid district.
1152
1153
    Parameters
1154
    -----------
1155
    scenario : str
1156
        Scenario name.
1157
    Returns
1158
    -------
1159
    pandas.DataFrame
1160
        DataFrame with total rooftop capacity per mv grid.
1161
    """
1162
    if "status" in scenario:
1163
        sources = config.datasets()["solar_rooftop"]["sources"]
1164
1165
        sql = f"""
1166
        SELECT bus_id, SUM(el_capacity) as capacity
1167
        FROM {sources['power_plants']['schema']}.{sources['power_plants']['table']}
1168
        WHERE carrier = 'solar_rooftop'
1169
        AND scenario = '{scenario}'
1170
        GROUP BY bus_id
1171
        """
1172
1173
        df = db.select_dataframe(sql, index_col="bus_id")
1174
1175
    else:
1176
        targets = config.datasets()["solar_rooftop"]["targets"]
1177
1178
        sql = f"""
1179
        SELECT bus as bus_id, control, p_nom as capacity
1180
        FROM {targets['generators']['schema']}.{targets['generators']['table']}
1181
        WHERE carrier = 'solar_rooftop'
1182
        AND scn_name = '{scenario}'
1183
        """
1184
1185
        df = db.select_dataframe(sql, index_col="bus_id")
1186
        df = df.loc[df.control != "Slack"]
1187
1188
    return df
1189
1190
1191
def determine_end_of_life_gens(
1192
    mastr_gdf: gpd.GeoDataFrame,
1193
    scenario_timestamp: pd.Timestamp,
1194
    pv_rooftop_lifetime: pd.Timedelta,
1195
) -> gpd.GeoDataFrame:
1196
    """
1197
    Determine if an old PV system has reached its end of life.
1198
1199
    Parameters
1200
    -----------
1201
    mastr_gdf : geopandas.GeoDataFrame
1202
        GeoDataFrame containing geocoded MaStR data.
1203
    scenario_timestamp : pandas.Timestamp
1204
        Timestamp at which the scenario takes place.
1205
    pv_rooftop_lifetime : pandas.Timedelta
1206
        Average expected lifetime of PV rooftop systems.
1207
    Returns
1208
    -------
1209
    geopandas.GeoDataFrame
1210
        GeoDataFrame containing geocoded MaStR data and info if the system
1211
        has reached its end of life.
1212
    """
1213
    before = mastr_gdf.capacity.sum()
1214
1215
    mastr_gdf = mastr_gdf.assign(
1216
        age=scenario_timestamp - mastr_gdf.commissioning_date
1217
    )
1218
1219
    mastr_gdf = mastr_gdf.assign(
1220
        end_of_life=pv_rooftop_lifetime < mastr_gdf.age
1221
    )
1222
1223
    after = mastr_gdf.loc[~mastr_gdf.end_of_life].capacity.sum()
1224
1225
    logger.debug(
1226
        f"Determined if pv rooftop systems reached their end of life.\nTotal "
1227
        f"capacity: {before}\nActive capacity: {after}"
1228
    )
1229
1230
    return mastr_gdf
1231
1232
1233
def calculate_max_pv_cap_per_building(
1234
    buildings_gdf: gpd.GeoDataFrame,
1235
    mastr_gdf: gpd.GeoDataFrame,
1236
    pv_cap_per_sq_m: float | int,
1237
    roof_factor: float | int,
1238
) -> gpd.GeoDataFrame:
1239
    """
1240
    Calculate the estimated maximum possible PV capacity per building.
1241
1242
    Parameters
1243
    -----------
1244
    buildings_gdf : geopandas.GeoDataFrame
1245
        GeoDataFrame containing OSM buildings data.
1246
    mastr_gdf : geopandas.GeoDataFrame
1247
        GeoDataFrame containing geocoded MaStR data.
1248
    pv_cap_per_sq_m : float, int
1249
        Average expected, installable PV capacity per square meter.
1250
    roof_factor : float, int
1251
        Average for PV usable roof area share.
1252
    Returns
1253
    -------
1254
    geopandas.GeoDataFrame
1255
        GeoDataFrame containing OSM buildings data with estimated maximum PV
1256
        capacity.
1257
    """
1258
    gdf = (
1259
        buildings_gdf.reset_index()
1260
        .rename(columns={"index": "id"})
1261
        .merge(
1262
            mastr_gdf[
1263
                [
1264
                    "capacity",
1265
                    "end_of_life",
1266
                    "building_id",
1267
                    "orientation_uniform",
1268
                    "orientation_primary",
1269
                    "orientation_primary_angle",
1270
                ]
1271
            ],
1272
            how="left",
1273
            left_on="id",
1274
            right_on="building_id",
1275
        )
1276
        .set_index("id")
1277
        .drop(columns="building_id")
1278
    )
1279
1280
    return gdf.assign(
1281
        max_cap=gdf.building_area.multiply(roof_factor * pv_cap_per_sq_m),
1282
        end_of_life=gdf.end_of_life.fillna(True).astype(bool),
1283
        bus_id=gdf.bus_id.astype(int),
1284
    )
1285
1286
1287
def calculate_building_load_factor(
1288
    mastr_gdf: gpd.GeoDataFrame,
1289
    buildings_gdf: gpd.GeoDataFrame,
1290
    rounding: int = 4,
1291
) -> gpd.GeoDataFrame:
1292
    """
1293
    Calculate the roof load factor from existing PV systems.
1294
1295
    Parameters
1296
    -----------
1297
    mastr_gdf : geopandas.GeoDataFrame
1298
        GeoDataFrame containing geocoded MaStR data.
1299
    buildings_gdf : geopandas.GeoDataFrame
1300
        GeoDataFrame containing OSM buildings data.
1301
    rounding : int
1302
        Rounding to use for load factor.
1303
    Returns
1304
    -------
1305
    geopandas.GeoDataFrame
1306
        GeoDataFrame containing geocoded MaStR data with calculated load
1307
        factor.
1308
    """
1309
    gdf = mastr_gdf.merge(
1310
        buildings_gdf[["max_cap", "building_area"]]
1311
        .loc[~buildings_gdf["max_cap"].isna()]
1312
        .reset_index(),
1313
        how="left",
1314
        left_on="building_id",
1315
        right_on="id",
1316
    ).set_index("id")
1317
1318
    return gdf.assign(load_factor=(gdf.capacity / gdf.max_cap).round(rounding))
1319
1320
1321
def get_probability_for_property(
1322
    mastr_gdf: gpd.GeoDataFrame,
1323
    cap_range: tuple[int | float, int | float],
1324
    prop: str,
1325
) -> tuple[np.array, np.array]:
1326
    """
1327
    Calculate the probability of the different options of a property of the
1328
    existing PV plants.
1329
1330
    Parameters
1331
    -----------
1332
    mastr_gdf : geopandas.GeoDataFrame
1333
        GeoDataFrame containing geocoded MaStR data.
1334
    cap_range : tuple(int, int)
1335
        Capacity range of PV plants to look at.
1336
    prop : str
1337
        Property to calculate probabilities for. String needs to be in columns
1338
        of mastr_gdf.
1339
    Returns
1340
    -------
1341
    tuple
1342
        numpy.array
1343
            Unique values of property.
1344
        numpy.array
1345
            Probabilties per unique value.
1346
    """
1347
    cap_range_gdf = mastr_gdf.loc[
1348
        (mastr_gdf.capacity > cap_range[0])
1349
        & (mastr_gdf.capacity <= cap_range[1])
1350
    ]
1351
1352
    if prop == "load_factor":
1353
        cap_range_gdf = cap_range_gdf.loc[cap_range_gdf[prop] <= 1]
1354
1355
    count = Counter(
1356
        cap_range_gdf[prop].loc[
1357
            ~cap_range_gdf[prop].isna()
1358
            & ~cap_range_gdf[prop].isnull()
1359
            & ~(cap_range_gdf[prop] == "None")
1360
        ]
1361
    )
1362
1363
    values = np.array(list(count.keys()))
1364
    probabilities = np.fromiter(count.values(), dtype=float)
1365
    probabilities = probabilities / np.sum(probabilities)
1366
1367
    return values, probabilities
1368
1369
1370
@timer_func
1371
def probabilities(
1372
    mastr_gdf: gpd.GeoDataFrame,
1373
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1374
    properties: list[str] | None = None,
1375
) -> dict:
1376
    """
1377
    Calculate the probability of the different options of properties of the
1378
    existing PV plants.
1379
1380
    Parameters
1381
    -----------
1382
    mastr_gdf : geopandas.GeoDataFrame
1383
        GeoDataFrame containing geocoded MaStR data.
1384
    cap_ranges : list(tuple(int, int))
1385
        List of capacity ranges to distinguish between. The first tuple should
1386
        start with a zero and the last one should end with infinite.
1387
    properties : list(str)
1388
        List of properties to calculate probabilities for. Strings need to be
1389
        in columns of mastr_gdf.
1390
    Returns
1391
    -------
1392
    dict
1393
        Dictionary with values and probabilities per capacity range.
1394
    """
1395
    if cap_ranges is None:
1396
        cap_ranges = [
1397
            (0, 30 / 10**3),
1398
            (30 / 10**3, 100 / 10**3),
1399
            (100 / 10**3, float("inf")),
1400
        ]
1401
    if properties is None:
1402
        properties = [
1403
            "orientation_uniform",
1404
            "orientation_primary",
1405
            "orientation_primary_angle",
1406
            "load_factor",
1407
        ]
1408
1409
    prob_dict = {}
1410
1411
    for cap_range in cap_ranges:
1412
        prob_dict[cap_range] = {
1413
            "values": {},
1414
            "probabilities": {},
1415
        }
1416
1417
        for prop in properties:
1418
            v, p = get_probability_for_property(
1419
                mastr_gdf,
1420
                cap_range,
1421
                prop,
1422
            )
1423
1424
            prob_dict[cap_range]["values"][prop] = v
1425
            prob_dict[cap_range]["probabilities"][prop] = p
1426
1427
    return prob_dict
1428
1429
1430
def cap_share_per_cap_range(
1431
    mastr_gdf: gpd.GeoDataFrame,
1432
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1433
) -> dict[tuple[int | float, int | float], float]:
1434
    """
1435
    Calculate the share of PV capacity from the total PV capacity within
1436
    capacity ranges.
1437
1438
    Parameters
1439
    -----------
1440
    mastr_gdf : geopandas.GeoDataFrame
1441
        GeoDataFrame containing geocoded MaStR data.
1442
    cap_ranges : list(tuple(int, int))
1443
        List of capacity ranges to distinguish between. The first tuple should
1444
        start with a zero and the last one should end with infinite.
1445
    Returns
1446
    -------
1447
    dict
1448
        Dictionary with share of PV capacity from the total PV capacity within
1449
        capacity ranges.
1450
    """
1451
    if cap_ranges is None:
1452
        cap_ranges = [
1453
            (0, 30 / 10**3),
1454
            (30 / 10**3, 100 / 10**3),
1455
            (100 / 10**3, float("inf")),
1456
        ]
1457
1458
    cap_share_dict = {}
1459
1460
    total_cap = mastr_gdf.capacity.sum()
1461
1462
    for cap_range in cap_ranges:
1463
        cap_share = (
1464
            mastr_gdf.loc[
1465
                (mastr_gdf.capacity > cap_range[0])
1466
                & (mastr_gdf.capacity <= cap_range[1])
1467
            ].capacity.sum()
1468
            / total_cap
1469
        )
1470
1471
        cap_share_dict[cap_range] = cap_share
1472
1473
    return cap_share_dict
1474
1475
1476
def mean_load_factor_per_cap_range(
1477
    mastr_gdf: gpd.GeoDataFrame,
1478
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1479
) -> dict[tuple[int | float, int | float], float]:
1480
    """
1481
    Calculate the mean roof load factor per capacity range from existing PV
1482
    plants.
1483
1484
    Parameters
1485
    -----------
1486
    mastr_gdf : geopandas.GeoDataFrame
1487
        GeoDataFrame containing geocoded MaStR data.
1488
    cap_ranges : list(tuple(int, int))
1489
        List of capacity ranges to distinguish between. The first tuple should
1490
        start with a zero and the last one should end with infinite.
1491
    Returns
1492
    -------
1493
    dict
1494
        Dictionary with mean roof load factor per capacity range.
1495
    """
1496
    if cap_ranges is None:
1497
        cap_ranges = [
1498
            (0, 30 / 10**3),
1499
            (30 / 10**3, 100 / 10**3),
1500
            (100 / 10**3, float("inf")),
1501
        ]
1502
1503
    load_factor_dict = {}
1504
1505
    for cap_range in cap_ranges:
1506
        load_factor = mastr_gdf.loc[
1507
            (mastr_gdf.load_factor <= 1)
1508
            & (mastr_gdf.capacity > cap_range[0])
1509
            & (mastr_gdf.capacity <= cap_range[1])
1510
        ].load_factor.mean()
1511
1512
        load_factor_dict[cap_range] = load_factor
1513
1514
    return load_factor_dict
1515
1516
1517
def building_area_range_per_cap_range(
1518
    mastr_gdf: gpd.GeoDataFrame,
1519
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1520
    min_building_size: int | float = 10.0,
1521
    upper_quantile: float = 0.95,
1522
    lower_quantile: float = 0.05,
1523
) -> dict[tuple[int | float, int | float], tuple[int | float, int | float]]:
1524
    """
1525
    Estimate normal building area range per capacity range.
1526
    Calculate the mean roof load factor per capacity range from existing PV
1527
    plants.
1528
1529
    Parameters
1530
    -----------
1531
    mastr_gdf : geopandas.GeoDataFrame
1532
        GeoDataFrame containing geocoded MaStR data.
1533
    cap_ranges : list(tuple(int, int))
1534
        List of capacity ranges to distinguish between. The first tuple should
1535
        start with a zero and the last one should end with infinite.
1536
    min_building_size : int, float
1537
        Minimal building size to consider for PV plants.
1538
    upper_quantile : float
1539
        Upper quantile to estimate maximum building size per capacity range.
1540
    lower_quantile : float
1541
        Lower quantile to estimate minimum building size per capacity range.
1542
    Returns
1543
    -------
1544
    dict
1545
        Dictionary with estimated normal building area range per capacity
1546
        range.
1547
    """
1548
    if cap_ranges is None:
1549
        cap_ranges = [
1550
            (0, 30 / 10**3),
1551
            (30 / 10**3, 100 / 10**3),
1552
            (100 / 10**3, float("inf")),
1553
        ]
1554
1555
    building_area_range_dict = {}
1556
1557
    n_ranges = len(cap_ranges)
1558
1559
    for count, cap_range in enumerate(cap_ranges):
1560
        cap_range_gdf = mastr_gdf.loc[
1561
            (mastr_gdf.capacity > cap_range[0])
1562
            & (mastr_gdf.capacity <= cap_range[1])
1563
        ]
1564
1565
        if count == 0:
1566
            building_area_range_dict[cap_range] = (
1567
                min_building_size,
1568
                cap_range_gdf.building_area.quantile(upper_quantile),
1569
            )
1570
        elif count == n_ranges - 1:
1571
            building_area_range_dict[cap_range] = (
1572
                cap_range_gdf.building_area.quantile(lower_quantile),
1573
                float("inf"),
1574
            )
1575
        else:
1576
            building_area_range_dict[cap_range] = (
1577
                cap_range_gdf.building_area.quantile(lower_quantile),
1578
                cap_range_gdf.building_area.quantile(upper_quantile),
1579
            )
1580
1581
    values = list(building_area_range_dict.values())
1582
1583
    building_area_range_normed_dict = {}
1584
1585
    for count, (cap_range, (min_area, max_area)) in enumerate(
1586
        building_area_range_dict.items()
1587
    ):
1588
        if count == 0:
1589
            building_area_range_normed_dict[cap_range] = (
1590
                min_area,
1591
                np.mean((values[count + 1][0], max_area)),
1592
            )
1593
        elif count == n_ranges - 1:
1594
            building_area_range_normed_dict[cap_range] = (
1595
                np.mean((values[count - 1][1], min_area)),
1596
                max_area,
1597
            )
1598
        else:
1599
            building_area_range_normed_dict[cap_range] = (
1600
                np.mean((values[count - 1][1], min_area)),
1601
                np.mean((values[count + 1][0], max_area)),
1602
            )
1603
1604
    return building_area_range_normed_dict
1605
1606
1607
@timer_func
1608
def desaggregate_pv_in_mv_grid(
1609
    buildings_gdf: gpd.GeoDataFrame,
1610
    pv_cap: float | int,
1611
    **kwargs,
1612
) -> gpd.GeoDataFrame:
1613
    """
1614
    Desaggregate PV capacity on buildings within a given grid district.
1615
1616
    Parameters
1617
    -----------
1618
    buildings_gdf : geopandas.GeoDataFrame
1619
        GeoDataFrame containing buildings within the grid district.
1620
    pv_cap : float, int
1621
        PV capacity to desaggregate.
1622
    Other Parameters
1623
    -----------
1624
    prob_dict : dict
1625
        Dictionary with values and probabilities per capacity range.
1626
    cap_share_dict : dict
1627
        Dictionary with share of PV capacity from the total PV capacity within
1628
        capacity ranges.
1629
    building_area_range_dict : dict
1630
        Dictionary with estimated normal building area range per capacity
1631
        range.
1632
    load_factor_dict : dict
1633
        Dictionary with mean roof load factor per capacity range.
1634
    seed : int
1635
        Seed to use for random operations with NumPy and pandas.
1636
    pv_cap_per_sq_m : float, int
1637
        Average expected, installable PV capacity per square meter.
1638
    Returns
1639
    -------
1640
    geopandas.GeoDataFrame
1641
        GeoDataFrame containing OSM building data with desaggregated PV
1642
        plants.
1643
    """
1644
    bus_id = int(buildings_gdf.bus_id.iat[0])
1645
1646
    rng = default_rng(seed=kwargs["seed"])
1647
    random_state = RandomState(seed=kwargs["seed"])
1648
1649
    results_df = pd.DataFrame(columns=buildings_gdf.columns)
1650
1651
    for cap_range, share in kwargs["cap_share_dict"].items():
1652
        pv_cap_range = pv_cap * share
1653
1654
        b_area_min, b_area_max = kwargs["building_area_range_dict"][cap_range]
1655
1656
        cap_range_buildings_gdf = buildings_gdf.loc[
1657
            ~buildings_gdf.index.isin(results_df.index)
1658
            & (buildings_gdf.building_area > b_area_min)
1659
            & (buildings_gdf.building_area <= b_area_max)
1660
        ]
1661
1662
        mean_load_factor = kwargs["load_factor_dict"][cap_range]
1663
        cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1664
            mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1665
            load_factor=np.nan,
1666
            capacity=np.nan,
1667
        )
1668
1669
        total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1670
1671
        if total_mean_cap == 0:
1672
            logger.warning(
1673
                f"There are no matching roof for capacity range {cap_range} "
1674
                f"kW in grid {bus_id}. Using all buildings as fallback."
1675
            )
1676
1677
            cap_range_buildings_gdf = buildings_gdf.loc[
1678
                ~buildings_gdf.index.isin(results_df.index)
1679
            ]
1680
1681
            if len(cap_range_buildings_gdf) == 0:
1682
                logger.warning(
1683
                    "There are no roofes available for capacity range "
1684
                    f"{cap_range} kW in grid {bus_id}. Allowing dual use."
1685
                )
1686
                cap_range_buildings_gdf = buildings_gdf.copy()
1687
1688
            cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1689
                mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1690
                load_factor=np.nan,
1691
                capacity=np.nan,
1692
            )
1693
1694
            total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1695
1696
        elif total_mean_cap < pv_cap_range:
1697
            logger.warning(
1698
                f"Average roof utilization of the roof area in grid {bus_id} "
1699
                f"and capacity range {cap_range} kW is not sufficient. The "
1700
                "roof utilization will be above average."
1701
            )
1702
1703
        frac = max(
1704
            pv_cap_range / total_mean_cap,
1705
            1 / len(cap_range_buildings_gdf),
1706
        )
1707
1708
        samples_gdf = cap_range_buildings_gdf.sample(
1709
            frac=min(1, frac),
1710
            random_state=random_state,
1711
        )
1712
1713
        cap_range_dict = kwargs["prob_dict"][cap_range]
1714
1715
        values_dict = cap_range_dict["values"]
1716
        p_dict = cap_range_dict["probabilities"]
1717
1718
        load_factors = rng.choice(
1719
            a=values_dict["load_factor"],
1720
            size=len(samples_gdf),
1721
            p=p_dict["load_factor"],
1722
        )
1723
1724
        samples_gdf = samples_gdf.assign(
1725
            load_factor=load_factors,
1726
            capacity=(
1727
                samples_gdf.building_area
1728
                * load_factors
1729
                * kwargs["pv_cap_per_sq_m"]
1730
            ).clip(lower=0.4),
1731
        )
1732
1733
        missing_factor = pv_cap_range / samples_gdf.capacity.sum()
1734
1735
        samples_gdf = samples_gdf.assign(
1736
            capacity=(samples_gdf.capacity * missing_factor),
1737
            load_factor=(samples_gdf.load_factor * missing_factor),
1738
        )
1739
1740
        assert np.isclose(
1741
            samples_gdf.capacity.sum(),
1742
            pv_cap_range,
1743
            rtol=1e-03,
1744
        ), f"{samples_gdf.capacity.sum()} != {pv_cap_range}"
1745
1746
        results_df = pd.concat(
1747
            [
1748
                results_df,
1749
                samples_gdf,
1750
            ],
1751
        )
1752
1753
    total_missing_factor = pv_cap / results_df.capacity.sum()
1754
1755
    results_df = results_df.assign(
1756
        capacity=(results_df.capacity * total_missing_factor),
1757
    )
1758
1759
    assert np.isclose(
1760
        results_df.capacity.sum(),
1761
        pv_cap,
1762
        rtol=1e-03,
1763
    ), f"{results_df.capacity.sum()} != {pv_cap}"
1764
1765
    return gpd.GeoDataFrame(
1766
        results_df,
1767
        crs=samples_gdf.crs,
0 ignored issues
show
introduced by
The variable samples_gdf does not seem to be defined in case the for loop on line 1651 is not entered. Are you sure this can never be the case?
Loading history...
1768
        geometry="geom",
1769
    )
1770
1771
1772
@timer_func
1773
def desaggregate_pv(
1774
    buildings_gdf: gpd.GeoDataFrame,
1775
    cap_df: pd.DataFrame,
1776
    **kwargs,
1777
) -> gpd.GeoDataFrame:
1778
    """
1779
    Desaggregate PV capacity on buildings within a given grid district.
1780
1781
    Parameters
1782
    -----------
1783
    buildings_gdf : geopandas.GeoDataFrame
1784
        GeoDataFrame containing OSM buildings data.
1785
    cap_df : pandas.DataFrame
1786
        DataFrame with total rooftop capacity per mv grid.
1787
    Other Parameters
1788
    -----------
1789
    prob_dict : dict
1790
        Dictionary with values and probabilities per capacity range.
1791
    cap_share_dict : dict
1792
        Dictionary with share of PV capacity from the total PV capacity within
1793
        capacity ranges.
1794
    building_area_range_dict : dict
1795
        Dictionary with estimated normal building area range per capacity
1796
        range.
1797
    load_factor_dict : dict
1798
        Dictionary with mean roof load factor per capacity range.
1799
    seed : int
1800
        Seed to use for random operations with NumPy and pandas.
1801
    pv_cap_per_sq_m : float, int
1802
        Average expected, installable PV capacity per square meter.
1803
    Returns
1804
    -------
1805
    geopandas.GeoDataFrame
1806
        GeoDataFrame containing OSM building data with desaggregated PV
1807
        plants.
1808
    """
1809
    allocated_buildings_gdf = buildings_gdf.loc[~buildings_gdf.end_of_life]
1810
1811
    building_bus_ids = set(buildings_gdf.bus_id)
1812
    cap_bus_ids = set(cap_df.index)
1813
1814
    logger.debug(
1815
        f"Bus IDs from buildings: {len(building_bus_ids)}\nBus IDs from "
1816
        f"capacity: {len(cap_bus_ids)}"
1817
    )
1818
1819
    if len(building_bus_ids) > len(cap_bus_ids):
1820
        missing = building_bus_ids - cap_bus_ids
1821
    else:
1822
        missing = cap_bus_ids - building_bus_ids
1823
1824
    logger.debug(str(missing))
1825
1826
    bus_ids = np.intersect1d(list(building_bus_ids), list(cap_bus_ids))
1827
1828
    # assert set(buildings_gdf.bus_id.unique()) == set(cap_df.index)
1829
1830
    for bus_id in bus_ids:
1831
        buildings_grid_gdf = buildings_gdf.loc[buildings_gdf.bus_id == bus_id]
1832
1833
        pv_installed_gdf = buildings_grid_gdf.loc[
1834
            ~buildings_grid_gdf.end_of_life
1835
        ]
1836
1837
        pv_installed = pv_installed_gdf.capacity.sum()
1838
1839
        pot_buildings_gdf = buildings_grid_gdf.drop(
1840
            index=pv_installed_gdf.index
1841
        )
1842
1843
        if len(pot_buildings_gdf) == 0:
1844
            logger.error(
1845
                f"In grid {bus_id} there are no potential buildings to "
1846
                f"allocate PV capacity to. The grid is skipped. This message "
1847
                f"should only appear doing test runs with few buildings."
1848
            )
1849
1850
            continue
1851
1852
        pv_target = cap_df.at[bus_id, "capacity"]
1853
1854
        logger.debug(f"pv_target: {pv_target}")
1855
1856
        pv_missing = pv_target - pv_installed
1857
1858
        if pv_missing <= 0:
1859
            logger.warning(
1860
                f"In grid {bus_id} there is more PV installed "
1861
                f"({pv_installed: g} kW) in status Quo than allocated within "
1862
                f"the scenario ({pv_target: g} kW). "
1863
                f"No new generators are added."
1864
            )
1865
1866
            continue
1867
1868
        if pot_buildings_gdf.max_cap.sum() < pv_missing:
1869
            logger.error(
1870
                f"In grid {bus_id} there is less PV potential ("
1871
                f"{pot_buildings_gdf.max_cap.sum():g} MW) than allocated PV "
1872
                f"capacity ({pv_missing:g} MW). The average roof utilization "
1873
                f"will be very high."
1874
            )
1875
1876
        gdf = desaggregate_pv_in_mv_grid(
1877
            buildings_gdf=pot_buildings_gdf,
1878
            pv_cap=pv_missing,
1879
            **kwargs,
1880
        )
1881
1882
        logger.debug(f"New cap in grid {bus_id}: {gdf.capacity.sum()}")
1883
        logger.debug(f"Installed cap in grid {bus_id}: {pv_installed}")
1884
        logger.debug(
1885
            f"Total cap in grid {bus_id}: {gdf.capacity.sum() + pv_installed}"
1886
        )
1887
1888
        if not np.isclose(
1889
            gdf.capacity.sum() + pv_installed, pv_target, rtol=1e-3
1890
        ):
1891
            logger.warning(
1892
                f"The desired capacity and actual capacity in grid {bus_id} "
1893
                f"differ.\n"
1894
                f"Desired cap: {pv_target}\nActual cap: "
1895
                f"{gdf.capacity.sum() + pv_installed}"
1896
            )
1897
1898
        pre_cap = allocated_buildings_gdf.capacity.sum()
1899
        new_cap = gdf.capacity.sum()
1900
1901
        allocated_buildings_gdf = pd.concat(
1902
            [
1903
                allocated_buildings_gdf,
1904
                gdf,
1905
            ]
1906
        )
1907
1908
        total_cap = allocated_buildings_gdf.capacity.sum()
1909
1910
        assert np.isclose(pre_cap + new_cap, total_cap)
1911
1912
    logger.debug("Desaggregated scenario.")
1913
    logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}")
1914
    logger.debug(
1915
        f"Generator capacity: " f"{allocated_buildings_gdf.capacity.sum(): g}"
1916
    )
1917
1918
    return gpd.GeoDataFrame(
1919
        allocated_buildings_gdf,
1920
        crs=gdf.crs,
0 ignored issues
show
introduced by
The variable gdf does not seem to be defined for all execution paths.
Loading history...
1921
        geometry="geom",
1922
    )
1923
1924
1925
@timer_func
1926
def add_buildings_meta_data(
1927
    buildings_gdf: gpd.GeoDataFrame,
1928
    prob_dict: dict,
1929
    seed: int,
1930
) -> gpd.GeoDataFrame:
1931
    """
1932
    Randomly add additional metadata to desaggregated PV plants.
1933
1934
    Parameters
1935
    -----------
1936
    buildings_gdf : geopandas.GeoDataFrame
1937
        GeoDataFrame containing OSM buildings data with desaggregated PV
1938
        plants.
1939
    prob_dict : dict
1940
        Dictionary with values and probabilities per capacity range.
1941
    seed : int
1942
        Seed to use for random operations with NumPy and pandas.
1943
    Returns
1944
    -------
1945
    geopandas.GeoDataFrame
1946
        GeoDataFrame containing OSM building data with desaggregated PV
1947
        plants.
1948
    """
1949
    rng = default_rng(seed=seed)
1950
    buildings_gdf = buildings_gdf.reset_index().rename(
1951
        columns={
1952
            "index": "building_id",
1953
        }
1954
    )
1955
1956
    for (min_cap, max_cap), cap_range_prob_dict in prob_dict.items():
1957
        cap_range_gdf = buildings_gdf.loc[
1958
            (buildings_gdf.capacity >= min_cap)
1959
            & (buildings_gdf.capacity < max_cap)
1960
        ]
1961
1962
        for key, values in cap_range_prob_dict["values"].items():
1963
            if key == "load_factor":
1964
                continue
1965
1966
            gdf = cap_range_gdf.loc[
1967
                cap_range_gdf[key].isna()
1968
                | cap_range_gdf[key].isnull()
1969
                | (cap_range_gdf[key] == "None")
1970
            ]
1971
1972
            key_vals = rng.choice(
1973
                a=values,
1974
                size=len(gdf),
1975
                p=cap_range_prob_dict["probabilities"][key],
1976
            )
1977
1978
            buildings_gdf.loc[gdf.index, key] = key_vals
1979
1980
    return buildings_gdf
1981
1982
1983
def add_commissioning_date(
1984
    buildings_gdf: gpd.GeoDataFrame,
1985
    start: pd.Timestamp,
1986
    end: pd.Timestamp,
1987
    seed: int,
1988
):
1989
    """
1990
    Randomly and linear add start-up date to new pv generators.
1991
1992
    Parameters
1993
    ----------
1994
    buildings_gdf : geopandas.GeoDataFrame
1995
        GeoDataFrame containing OSM buildings data with desaggregated PV
1996
        plants.
1997
    start : pandas.Timestamp
1998
        Minimum Timestamp to use.
1999
    end : pandas.Timestamp
2000
        Maximum Timestamp to use.
2001
    seed : int
2002
        Seed to use for random operations with NumPy and pandas.
2003
    Returns
2004
    -------
2005
    geopandas.GeoDataFrame
2006
        GeoDataFrame containing OSM buildings data with start-up date added.
2007
    """
2008
    rng = default_rng(seed=seed)
2009
2010
    date_range = pd.date_range(start=start, end=end, freq="1D")
2011
2012
    return buildings_gdf.assign(
2013
        commissioning_date=rng.choice(date_range, size=len(buildings_gdf))
2014
    )
2015
2016
2017
@timer_func
2018
def allocate_scenarios(
2019
    mastr_gdf: gpd.GeoDataFrame,
2020
    valid_buildings_gdf: gpd.GeoDataFrame,
2021
    last_scenario_gdf: gpd.GeoDataFrame,
2022
    scenario: str,
2023
):
2024
    """
2025
    Desaggregate and allocate scenario pv rooftop ramp-ups onto buildings.
2026
2027
    Parameters
2028
    ----------
2029
    mastr_gdf : geopandas.GeoDataFrame
2030
        GeoDataFrame containing geocoded MaStR data.
2031
    valid_buildings_gdf : geopandas.GeoDataFrame
2032
        GeoDataFrame containing OSM buildings data.
2033
    last_scenario_gdf : geopandas.GeoDataFrame
2034
        GeoDataFrame containing OSM buildings matched with pv generators from
2035
        temporally preceding scenario.
2036
    scenario : str
2037
        Scenario to desaggrgate and allocate.
2038
    Returns
2039
    -------
2040
    tuple
2041
        geopandas.GeoDataFrame
2042
            GeoDataFrame containing OSM buildings matched with pv generators.
2043
        pandas.DataFrame
2044
            DataFrame containing pv rooftop capacity per grid id.
2045
    """
2046
    cap_per_bus_id_df = cap_per_bus_id(scenario)
2047
2048
    if cap_per_bus_id_df.empty:
2049
        print(f"No PV rooftop in scenario {scenario}")
2050
        return
2051
2052
    logger.debug(
2053
        f"cap_per_bus_id_df total capacity: {cap_per_bus_id_df.capacity.sum()}"
2054
    )
2055
2056
    last_scenario_gdf = determine_end_of_life_gens(
2057
        last_scenario_gdf,
2058
        SCENARIO_TIMESTAMP[scenario],
2059
        PV_ROOFTOP_LIFETIME,
2060
    )
2061
2062
    buildings_gdf = calculate_max_pv_cap_per_building(
2063
        valid_buildings_gdf,
2064
        last_scenario_gdf,
2065
        PV_CAP_PER_SQ_M,
2066
        ROOF_FACTOR,
2067
    )
2068
2069
    mastr_gdf = calculate_building_load_factor(
2070
        mastr_gdf,
2071
        buildings_gdf,
2072
    )
2073
2074
    probabilities_dict = probabilities(
2075
        mastr_gdf,
2076
        cap_ranges=CAP_RANGES,
2077
    )
2078
2079
    cap_share_dict = cap_share_per_cap_range(
2080
        mastr_gdf,
2081
        cap_ranges=CAP_RANGES,
2082
    )
2083
2084
    load_factor_dict = mean_load_factor_per_cap_range(
2085
        mastr_gdf,
2086
        cap_ranges=CAP_RANGES,
2087
    )
2088
2089
    building_area_range_dict = building_area_range_per_cap_range(
2090
        mastr_gdf,
2091
        cap_ranges=CAP_RANGES,
2092
        min_building_size=MIN_BUILDING_SIZE,
2093
        upper_quantile=UPPER_QUANTILE,
2094
        lower_quantile=LOWER_QUANTILE,
2095
    )
2096
2097
    allocated_buildings_gdf = desaggregate_pv(
2098
        buildings_gdf=buildings_gdf,
2099
        cap_df=cap_per_bus_id_df,
2100
        prob_dict=probabilities_dict,
2101
        cap_share_dict=cap_share_dict,
2102
        building_area_range_dict=building_area_range_dict,
2103
        load_factor_dict=load_factor_dict,
2104
        seed=SEED,
2105
        pv_cap_per_sq_m=PV_CAP_PER_SQ_M,
2106
    )
2107
2108
    allocated_buildings_gdf = allocated_buildings_gdf.assign(scenario=scenario)
2109
2110
    meta_buildings_gdf = frame_to_numeric(
2111
        add_buildings_meta_data(
2112
            allocated_buildings_gdf,
2113
            probabilities_dict,
2114
            SEED,
2115
        )
2116
    )
2117
2118
    return (
2119
        add_commissioning_date(
2120
            meta_buildings_gdf,
2121
            start=last_scenario_gdf.commissioning_date.max(),
2122
            end=SCENARIO_TIMESTAMP[scenario],
2123
            seed=SEED,
2124
        ),
2125
        cap_per_bus_id_df,
2126
    )
2127
2128
2129
class EgonPowerPlantPvRoofBuilding(Base):
2130
    """
2131
    Class definition of table supply.egon_power_plants_pv_roof_building.
2132
2133
    """
2134
    __tablename__ = "egon_power_plants_pv_roof_building"
2135
    __table_args__ = {"schema": "supply"}
2136
2137
    index = Column(Integer, primary_key=True, index=True)
2138
    scenario = Column(String)
2139
    bus_id = Column(Integer, nullable=True)
2140
    building_id = Column(Integer)
2141
    gens_id = Column(String, nullable=True)
2142
    capacity = Column(Float)
2143
    orientation_uniform = Column(Float)
2144
    orientation_primary = Column(String)
2145
    orientation_primary_angle = Column(String)
2146
    voltage_level = Column(Integer)
2147
    weather_cell_id = Column(Integer)
2148
2149
2150
def add_metadata():
2151
    schema = "supply"
2152
    table = "egon_power_plants_pv_roof_building"
2153
    name = f"{schema}.{table}"
2154
    deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"]
2155
    deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][
2156
        "zenodo"
2157
    ]["deposit_id"]
2158
2159
    contris = contributors(["kh", "kh"])
2160
2161
    contris[0]["date"] = "2023-03-16"
2162
2163
    contris[0]["object"] = "metadata"
2164
    contris[1]["object"] = "dataset"
2165
2166
    contris[0]["comment"] = "Add metadata to dataset."
2167
    contris[1]["comment"] = "Add workflow to generate dataset."
2168
2169
    meta = {
2170
        "name": name,
2171
        "title": "eGon power plants rooftop solar",
2172
        "id": "WILL_BE_SET_AT_PUBLICATION",
2173
        "description": (
2174
            "eGon power plants rooftop solar systems allocated to buildings"
2175
        ),
2176
        "language": "en-US",
2177
        "keywords": ["photovoltaik", "solar", "pv", "mastr", "status quo"],
2178
        "publicationDate": datetime.date.today().isoformat(),
2179
        "context": context(),
2180
        "spatial": {
2181
            "location": "none",
2182
            "extent": "Germany",
2183
            "resolution": "building",
2184
        },
2185
        "temporal": {
2186
            "referenceDate": (
2187
                config.datasets()["mastr_new"]["egon2021_date_max"].split(" ")[
2188
                    0
2189
                ]
2190
            ),
2191
            "timeseries": {},
2192
        },
2193
        "sources": [
2194
            {
2195
                "title": "Data bundle for egon-data",
2196
                "description": (
2197
                    "Data bundle for egon-data: A transparent and "
2198
                    "reproducible data processing pipeline for energy "
2199
                    "system modeling"
2200
                ),
2201
                "path": (
2202
                    "https://zenodo.org/record/"
2203
                    f"{deposit_id_data_bundle}#.Y_dWM4CZMVM"
2204
                ),
2205
                "licenses": [license_dedl(attribution="© Cußmann, Ilka")],
2206
            },
2207
            {
2208
                "title": ("open-MaStR power unit registry for eGo^n project"),
2209
                "description": (
2210
                    "Data from Marktstammdatenregister (MaStR) data using "
2211
                    "the data dump from 2022-11-17 for eGon-data."
2212
                ),
2213
                "path": (
2214
                    f"https://zenodo.org/record/{deposit_id_mastr}"
2215
                ),
2216
                "licenses": [license_dedl(attribution="© Amme, Jonathan")],
2217
            },
2218
            sources()["openstreetmap"],
2219
            sources()["era5"],
2220
            sources()["vg250"],
2221
            sources()["egon-data"],
2222
        ],
2223
        "licenses": [license_odbl("© eGon development team")],
2224
        "contributors": contris,
2225
        "resources": [
2226
            {
2227
                "profile": "tabular-data-resource",
2228
                "name": name,
2229
                "path": "None",
2230
                "format": "PostgreSQL",
2231
                "encoding": "UTF-8",
2232
                "schema": {
2233
                    "fields": generate_resource_fields_from_db_table(
2234
                        schema,
2235
                        table,
2236
                    ),
2237
                    "primaryKey": "index",
2238
                },
2239
                "dialect": {"delimiter": "", "decimalSeparator": ""},
2240
            }
2241
        ],
2242
        "review": {"path": "", "badge": ""},
2243
        "metaMetadata": meta_metadata(),
2244
        "_comment": {
2245
            "metadata": (
2246
                "Metadata documentation and explanation (https://github."
2247
                "com/OpenEnergyPlatform/oemetadata/blob/master/metadata/"
2248
                "v141/metadata_key_description.md)"
2249
            ),
2250
            "dates": (
2251
                "Dates and time must follow the ISO8601 including time "
2252
                "zone (YYYY-MM-DD or YYYY-MM-DDThh:mm:ss±hh)"
2253
            ),
2254
            "units": "Use a space between numbers and units (100 m)",
2255
            "languages": (
2256
                "Languages must follow the IETF (BCP47) format (en-GB, "
2257
                "en-US, de-DE)"
2258
            ),
2259
            "licenses": (
2260
                "License name must follow the SPDX License List "
2261
                "(https://spdx.org/licenses/)"
2262
            ),
2263
            "review": (
2264
                "Following the OEP Data Review (https://github.com/"
2265
                "OpenEnergyPlatform/data-preprocessing/wiki)"
2266
            ),
2267
            "none": "If not applicable use (none)",
2268
        },
2269
    }
2270
2271
    dialect = get_dialect(f"oep-v{meta_metadata()['metadataVersion'][4:7]}")()
2272
2273
    meta = dialect.compile_and_render(dialect.parse(json.dumps(meta)))
2274
2275
    db.submit_comment(
2276
        f"'{json.dumps(meta)}'",
2277
        schema,
2278
        table,
2279
    )
2280
2281
2282
def create_scenario_table(buildings_gdf):
2283
    """Create mapping table pv_unit <-> building for scenario"""
2284
    EgonPowerPlantPvRoofBuilding.__table__.drop(bind=engine, checkfirst=True)
2285
    EgonPowerPlantPvRoofBuilding.__table__.create(bind=engine, checkfirst=True)
2286
2287
    buildings_gdf[COLS_TO_EXPORT].reset_index().to_sql(
2288
        name=EgonPowerPlantPvRoofBuilding.__table__.name,
2289
        schema=EgonPowerPlantPvRoofBuilding.__table__.schema,
2290
        con=db.engine(),
2291
        if_exists="append",
2292
        index=False,
2293
    )
2294
2295
    add_metadata()
2296
2297
2298
def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
2299
    sql = """
2300
    SELECT building_id, zensus_population_id
2301
    FROM boundaries.egon_map_zensus_mvgd_buildings
2302
    """
2303
2304
    buildings_gdf = buildings_gdf.merge(
2305
        right=db.select_dataframe(sql).drop_duplicates(subset="building_id"),
2306
        how="left",
2307
        on="building_id",
2308
    )
2309
2310
    sql = """
2311
    SELECT zensus_population_id, w_id as weather_cell_id
2312
    FROM boundaries.egon_map_zensus_weather_cell
2313
    """
2314
2315
    buildings_gdf = buildings_gdf.merge(
2316
        right=db.select_dataframe(sql).drop_duplicates(
2317
            subset="zensus_population_id"
2318
        ),
2319
        how="left",
2320
        on="zensus_population_id",
2321
    )
2322
2323
    if buildings_gdf.weather_cell_id.isna().any():
2324
        missing = buildings_gdf.loc[
2325
            buildings_gdf.weather_cell_id.isna(), "building_id"
2326
        ].tolist()
2327
2328
        raise ValueError(
2329
            f"Following buildings don't have a weather cell id: {missing}"
2330
        )
2331
2332
    return buildings_gdf
2333
2334
2335
def add_bus_ids_sq(
2336
    buildings_gdf: gpd.GeoDataFrame,
2337
) -> gpd.GeoDataFrame:
2338
    """Add bus ids for status_quo units
2339
2340
    Parameters
2341
    -----------
2342
    buildings_gdf : geopandas.GeoDataFrame
2343
        GeoDataFrame containing OSM buildings data with desaggregated PV
2344
        plants.
2345
    Returns
2346
    -------
2347
    geopandas.GeoDataFrame
2348
        GeoDataFrame containing OSM building data with bus_id per
2349
        generator.
2350
    """
2351
    grid_districts_gdf = grid_districts(EPSG)
2352
2353
    mask = buildings_gdf.scenario == "status_quo"
2354
2355
    buildings_gdf.loc[mask, "bus_id"] = (
2356
        buildings_gdf.loc[mask]
2357
        .sjoin(grid_districts_gdf, how="left")
2358
        .index_right
2359
    )
2360
2361
    return buildings_gdf
2362
2363
2364 View Code Duplication
def infer_voltage_level(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2365
    units_gdf: gpd.GeoDataFrame,
2366
) -> gpd.GeoDataFrame:
2367
    """
2368
    Infer nan values in voltage level derived from generator capacity to
2369
    the power plants.
2370
2371
    Parameters
2372
    -----------
2373
    units_gdf : geopandas.GeoDataFrame
2374
        GeoDataFrame containing units with voltage levels from MaStR
2375
    Returnsunits_gdf: gpd.GeoDataFrame
2376
    -------
2377
    geopandas.GeoDataFrame
2378
        GeoDataFrame containing units all having assigned a voltage level.
2379
    """
2380
2381
    def voltage_levels(p: float) -> int:
2382
        if p <= 0.1:
2383
            return 7
2384
        elif p <= 0.2:
2385
            return 6
2386
        elif p <= 5.5:
2387
            return 5
2388
        elif p <= 20:
2389
            return 4
2390
        elif p <= 120:
2391
            return 3
2392
        return 1
2393
2394
    units_gdf["voltage_level_inferred"] = False
2395
    mask = units_gdf.voltage_level.isna()
2396
    units_gdf.loc[mask, "voltage_level_inferred"] = True
2397
    units_gdf.loc[mask, "voltage_level"] = units_gdf.loc[mask].capacity.apply(
2398
        voltage_levels
2399
    )
2400
2401
    return units_gdf
2402
2403
2404
def pv_rooftop_to_buildings():
2405
    """Main script, executed as task"""
2406
2407
    mastr_gdf = load_mastr_data()
2408
2409
    status_quo = "status2023" # FIXME: Hard coded
2410
2411
    ts = pd.Timestamp(
2412
        config.datasets()["mastr_new"][f"{status_quo}_date_max"], tz="UTC"
2413
    )
2414
2415
    mastr_gdf = mastr_gdf.loc[
2416
        mastr_gdf.commissioning_date <= ts
2417
    ]
2418
2419
    buildings_gdf = load_building_data()
2420
2421
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_to_buildings(
2422
        mastr_gdf, buildings_gdf
2423
    )
2424
2425
    all_buildings_gdf = (
2426
        desagg_mastr_gdf.assign(scenario=status_quo)
2427
        .reset_index()
2428
        .rename(columns={"geometry": "geom"})
2429
        .set_geometry("geom")
2430
    )
2431
2432
    scenario_buildings_gdf = all_buildings_gdf.copy()
2433
2434
    cap_per_bus_id_df = pd.DataFrame()
2435
2436
    for scenario in SCENARIOS:
2437
        if scenario == status_quo:
2438
            continue
2439
        elif "status" in scenario:
2440
            ts = pd.Timestamp(
2441
                config.datasets()["mastr_new"][f"{scenario}_date_max"], tz="UTC"
2442
            )
2443
2444
            scenario_buildings_gdf = scenario_buildings_gdf.loc[
2445
                scenario_buildings_gdf.commissioning_date <= ts
2446
            ]
2447
2448
        else:
2449
            logger.debug(f"Desaggregating scenario {scenario}.")
2450
2451
            (
2452
                scenario_buildings_gdf,
2453
                cap_per_bus_id_scenario_df,
2454
            ) = allocate_scenarios(  # noqa: F841
2455
                desagg_mastr_gdf,
2456
                desagg_buildings_gdf,
2457
                scenario_buildings_gdf,
2458
                scenario,
2459
            )
2460
2461
            cap_per_bus_id_df = pd.concat(
2462
                [cap_per_bus_id_df, cap_per_bus_id_scenario_df]
2463
            )
2464
2465
        all_buildings_gdf = gpd.GeoDataFrame(
2466
            pd.concat(
2467
                [all_buildings_gdf, scenario_buildings_gdf], ignore_index=True
2468
            ),
2469
            crs=scenario_buildings_gdf.crs,
2470
            geometry="geom",
2471
        )
2472
2473
    # add weather cell
2474
    all_buildings_gdf = add_weather_cell_id(all_buildings_gdf)
2475
2476
    # add bus IDs for status quo scenario
2477
    all_buildings_gdf = add_bus_ids_sq(all_buildings_gdf)
2478
2479
    # export scenario
2480
    create_scenario_table(infer_voltage_level(all_buildings_gdf))
2481