Passed
Pull Request — dev (#1181)
by
unknown
05:34
created

pv_rooftop_to_buildings()   B

Complexity

Conditions 4

Size

Total Lines 77
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 44
dl 0
loc 77
rs 8.824
c 0
b 0
f 0
cc 4
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
Distribute MaStR PV rooftop capacities to OSM and synthetic buildings. Generate
3
new PV rooftop generators for scenarios eGon2035 and eGon100RE.
4
5
See documentation section :ref:`pv-rooftop-ref` for more information.
6
7
"""
8
from __future__ import annotations
9
10
from collections import Counter
11
from functools import wraps
12
from time import perf_counter
13
import datetime
14
import json
15
16
from geoalchemy2 import Geometry
17
from loguru import logger
18
from numpy.random import RandomState, default_rng
19
from omi.dialects import get_dialect
20
from pyproj.crs.crs import CRS
21
from sqlalchemy import BigInteger, Column, Float, Integer, String
22
from sqlalchemy.dialects.postgresql import HSTORE
23
from sqlalchemy.ext.declarative import declarative_base
24
import geopandas as gpd
25
import numpy as np
26
import pandas as pd
27
28
from egon.data import config, db
29
from egon.data.datasets.electricity_demand_timeseries.hh_buildings import (
30
    OsmBuildingsSynthetic,
31
)
32
from egon.data.datasets.power_plants.mastr_db_classes import EgonPowerPlantsPv
33
from egon.data.datasets.scenario_capacities import EgonScenarioCapacities
34
from egon.data.datasets.scenario_parameters import get_scenario_year
35
from egon.data.datasets.zensus_vg250 import Vg250Gem
36
from egon.data.metadata import (
37
    context,
38
    contributors,
39
    generate_resource_fields_from_db_table,
40
    license_dedl,
41
    license_odbl,
42
    meta_metadata,
43
    meta_metadata,
44
    sources,
45
)
46
47
engine = db.engine()
48
Base = declarative_base()
49
SEED = int(config.settings()["egon-data"]["--random-seed"])
50
51
# TODO: move to yml
52
MASTR_INDEX_COL = "gens_id"
53
54
EPSG = 4326
55
SRID = 3035
56
57
# data cleaning
58
MAX_REALISTIC_PV_CAP = 23500 / 10**3
59
MIN_REALISTIC_PV_CAP = 0.1 / 10**3
60
61
# show additional logging information
62
VERBOSE = False
63
64
# Number of quantiles
65
Q = 5
66
67
# Scenario Data
68
SCENARIOS = config.settings()["egon-data"]["--scenarios"]
69
SCENARIO_TIMESTAMP = {
70
    "status2019": pd.Timestamp("2020-01-01", tz="UTC"),
71
    "status2023": pd.Timestamp("2024-01-01", tz="UTC"),
72
    "eGon2035": pd.Timestamp("2035-01-01", tz="UTC"),
73
    "eGon100RE": pd.Timestamp("2050-01-01", tz="UTC"),
74
}
75
PV_ROOFTOP_LIFETIME = pd.Timedelta(20 * 365, unit="D")
76
77
# Example Modul Trina Vertex S TSM-400DE09M.08 400 Wp
78
# https://www.photovoltaik4all.de/media/pdf/92/64/68/Trina_Datasheet_VertexS_DE09-08_2021_A.pdf
79
MODUL_CAP = 0.4 / 10**3  # MWp
80
MODUL_SIZE = 1.096 * 1.754  # m²
81
PV_CAP_PER_SQ_M = MODUL_CAP / MODUL_SIZE
82
83
# Estimation of usable roof area
84
# Factor for the conversion of building area to roof area
85
# estimation mean roof pitch: 35°
86
# estimation usable roof share: 80%
87
# estimation that only the south side of the building is used for pv
88
# see https://mediatum.ub.tum.de/doc/%20969497/969497.pdf
89
# AREA_FACTOR = 1.221
90
# USABLE_ROOF_SHARE = 0.8
91
# SOUTH_SHARE = 0.5
92
# ROOF_FACTOR = AREA_FACTOR * USABLE_ROOF_SHARE * SOUTH_SHARE
93
ROOF_FACTOR = 0.5
94
95
CAP_RANGES = [
96
    (0, 30 / 10**3),
97
    (30 / 10**3, 100 / 10**3),
98
    (100 / 10**3, float("inf")),
99
]
100
101
MIN_BUILDING_SIZE = 10.0
102
UPPER_QUANTILE = 0.95
103
LOWER_QUANTILE = 0.05
104
105
COLS_TO_EXPORT = [
106
    "scenario",
107
    "bus_id",
108
    "building_id",
109
    "gens_id",
110
    "capacity",
111
    "orientation_uniform",
112
    "orientation_primary",
113
    "orientation_primary_angle",
114
    "voltage_level",
115
    "weather_cell_id",
116
]
117
118
# TODO
119
INCLUDE_SYNTHETIC_BUILDINGS = True
120
ONLY_BUILDINGS_WITH_DEMAND = True
121
TEST_RUN = False
122
123
124
def timer_func(func):
125
    @wraps(func)
126
    def timeit_wrapper(*args, **kwargs):
127
        start_time = perf_counter()
128
        result = func(*args, **kwargs)
129
        end_time = perf_counter()
130
        total_time = end_time - start_time
131
        logger.debug(
132
            f"Function {func.__name__} took {total_time:.4f} seconds."
133
        )
134
        return result
135
136
    return timeit_wrapper
137
138
139
@timer_func
140
def mastr_data(
141
    index_col: str | int | list[str] | list[int],
142
) -> gpd.GeoDataFrame:
143
    """
144
    Read MaStR data from database.
145
146
    Parameters
147
    -----------
148
    index_col : str, int or list of str or int
149
        Column(s) to use as the row labels of the DataFrame.
150
    Returns
151
    -------
152
    pandas.DataFrame
153
        DataFrame containing MaStR data.
154
    """
155
    with db.session_scope() as session:
156
        query = session.query(EgonPowerPlantsPv).filter(
157
            EgonPowerPlantsPv.status == "InBetrieb",
158
            EgonPowerPlantsPv.site_type
159
            == ("Bauliche Anlagen (Hausdach, Gebäude und Fassade)"),
160
        )
161
162
    gdf = gpd.read_postgis(
163
        query.statement, query.session.bind, index_col=index_col
164
    ).drop(columns="id")
165
166
    logger.debug("MaStR data loaded.")
167
168
    return gdf
169
170
171
@timer_func
172
def clean_mastr_data(
173
    mastr_gdf: gpd.GeoDataFrame,
174
    max_realistic_pv_cap: int | float,
175
    min_realistic_pv_cap: int | float,
176
    seed: int,
177
) -> gpd.GeoDataFrame:
178
    """
179
    Clean the MaStR data from implausible data.
180
181
    * Drop MaStR ID duplicates.
182
    * Drop generators with implausible capacities.
183
184
    Parameters
185
    -----------
186
    mastr_gdf : pandas.DataFrame
187
        DataFrame containing MaStR data.
188
    max_realistic_pv_cap : int or float
189
        Maximum capacity, which is considered to be realistic.
190
    min_realistic_pv_cap : int or float
191
        Minimum capacity, which is considered to be realistic.
192
    seed : int
193
        Seed to use for random operations with NumPy and pandas.
194
    Returns
195
    -------
196
    pandas.DataFrame
197
        DataFrame containing cleaned MaStR data.
198
    """
199
    init_len = len(mastr_gdf)
200
201
    # drop duplicates
202
    mastr_gdf = mastr_gdf.loc[~mastr_gdf.index.duplicated()]
203
204
    # drop generators without any capacity info
205
    # and capacity of zero
206
    # and if the capacity is > 23.5 MW, because
207
    # Germanies largest rooftop PV is 23 MW
208
    # https://www.iwr.de/news/groesste-pv-dachanlage-europas-wird-in-sachsen-anhalt-gebaut-news37379
209
    mastr_gdf = mastr_gdf.loc[
210
        ~mastr_gdf.capacity.isna()
211
        & (mastr_gdf.capacity <= max_realistic_pv_cap)
212
        & (mastr_gdf.capacity > min_realistic_pv_cap)
213
    ]
214
215
    # get consistent start-up date
216
    # randomly and weighted fill missing start-up dates
217
    pool = mastr_gdf.loc[
218
        ~mastr_gdf.commissioning_date.isna()
219
    ].commissioning_date.to_numpy()
220
221
    size = len(mastr_gdf) - len(pool)
222
223
    if size > 0:
224
        rng = default_rng(seed=seed)
225
226
        choice = rng.choice(
227
            pool,
228
            size=size,
229
            replace=False,
230
        )
231
232
        mastr_gdf.loc[mastr_gdf.commissioning_date.isna()] = mastr_gdf.loc[
233
            mastr_gdf.commissioning_date.isna()
234
        ].assign(commissioning_date=choice)
235
236
        logger.info(
237
            f"Randomly and weigthed added start-up date to {size} generators."
238
        )
239
240
    mastr_gdf = mastr_gdf.assign(
241
        commissioning_date=pd.to_datetime(
242
            mastr_gdf.commissioning_date, utc=True
243
        )
244
    )
245
246
    end_len = len(mastr_gdf)
247
    logger.debug(
248
        f"Dropped {init_len - end_len} "
249
        f"({((init_len - end_len) / init_len) * 100:g}%)"
250
        f" of {init_len} rows from MaStR DataFrame."
251
    )
252
253
    return mastr_gdf
254
255
256
@timer_func
257
def municipality_data() -> gpd.GeoDataFrame:
258
    """
259
    Get municipality data from eGo^n Database.
260
    Returns
261
    -------
262
    gepandas.GeoDataFrame
263
        GeoDataFrame with municipality data.
264
    """
265
    with db.session_scope() as session:
266
        query = session.query(Vg250Gem.ags, Vg250Gem.geometry.label("geom"))
267
268
    return gpd.read_postgis(
269
        query.statement, query.session.bind, index_col="ags"
270
    )
271
272
273
@timer_func
274
def add_ags_to_gens(
275
    mastr_gdf: gpd.GeoDataFrame,
276
    municipalities_gdf: gpd.GeoDataFrame,
277
) -> gpd.GeoDataFrame:
278
    """
279
    Add information about AGS ID to generators.
280
    Parameters
281
    -----------
282
    mastr_gdf : geopandas.GeoDataFrame
283
        GeoDataFrame with valid and cleaned MaStR data.
284
    municipalities_gdf : geopandas.GeoDataFrame
285
        GeoDataFrame with municipality data.
286
    Returns
287
    -------
288
    gepandas.GeoDataFrame
289
        GeoDataFrame with valid and cleaned MaStR data
290
        with AGS ID added.
291
    """
292
    return mastr_gdf.sjoin(
293
        municipalities_gdf,
294
        how="left",
295
        predicate="intersects",
296
    ).rename(columns={"index_right": "ags"})
297
298
299
def drop_gens_outside_muns(
300
    mastr_gdf: gpd.GeoDataFrame,
301
) -> gpd.GeoDataFrame:
302
    """
303
    Drop all generators outside of municipalities.
304
    Parameters
305
    -----------
306
    mastr_gdf : geopandas.GeoDataFrame
307
        GeoDataFrame with valid and cleaned MaStR data.
308
    Returns
309
    -------
310
    gepandas.GeoDataFrame
311
        GeoDataFrame with valid and cleaned MaStR data
312
        with generatos without an AGS ID dropped.
313
    """
314
    gdf = mastr_gdf.loc[~mastr_gdf.ags.isna()]
315
316
    logger.debug(
317
        f"{len(mastr_gdf) - len(gdf)} ("
318
        f"{(len(mastr_gdf) - len(gdf)) / len(mastr_gdf) * 100:g}%)"
319
        f" of {len(mastr_gdf)} values are outside of the municipalities"
320
        " and are therefore dropped."
321
    )
322
323
    return gdf
324
325
326
def load_mastr_data():
327
    """Read PV rooftop data from MaStR CSV
328
    Note: the source will be replaced as soon as the MaStR data is available
329
    in DB.
330
    Returns
331
    -------
332
    geopandas.GeoDataFrame
333
        GeoDataFrame containing MaStR data with geocoded locations.
334
    """
335
    mastr_gdf = mastr_data(
336
        MASTR_INDEX_COL,
337
    )
338
339
    clean_mastr_gdf = clean_mastr_data(
340
        mastr_gdf,
341
        max_realistic_pv_cap=MAX_REALISTIC_PV_CAP,
342
        min_realistic_pv_cap=MIN_REALISTIC_PV_CAP,
343
        seed=SEED,
344
    )
345
346
    municipalities_gdf = municipality_data()
347
348
    clean_mastr_gdf = add_ags_to_gens(clean_mastr_gdf, municipalities_gdf)
349
350
    return drop_gens_outside_muns(clean_mastr_gdf)
351
352
353
class OsmBuildingsFiltered(Base):
354
    """
355
    Class definition of table openstreetmap.osm_buildings_filtered.
356
357
    """
358
    __tablename__ = "osm_buildings_filtered"
359
    __table_args__ = {"schema": "openstreetmap"}
360
361
    osm_id = Column(BigInteger)
362
    amenity = Column(String)
363
    building = Column(String)
364
    name = Column(String)
365
    geom = Column(Geometry(srid=SRID), index=True)
366
    area = Column(Float)
367
    geom_point = Column(Geometry(srid=SRID), index=True)
368
    tags = Column(HSTORE)
369
    id = Column(BigInteger, primary_key=True, index=True)
370
371
372
@timer_func
373
def osm_buildings(
374
    to_crs: CRS,
375
) -> gpd.GeoDataFrame:
376
    """
377
    Read OSM buildings data from eGo^n Database.
378
    Parameters
379
    -----------
380
    to_crs : pyproj.crs.crs.CRS
381
        CRS to transform geometries to.
382
    Returns
383
    -------
384
    geopandas.GeoDataFrame
385
        GeoDataFrame containing OSM buildings data.
386
    """
387
    with db.session_scope() as session:
388
        query = session.query(
389
            OsmBuildingsFiltered.id,
390
            OsmBuildingsFiltered.area,
391
            OsmBuildingsFiltered.geom_point.label("geom"),
392
        )
393
394
    return gpd.read_postgis(
395
        query.statement, query.session.bind, index_col="id"
396
    ).to_crs(to_crs)
397
398
399
@timer_func
400
def synthetic_buildings(
401
    to_crs: CRS,
402
) -> gpd.GeoDataFrame:
403
    """
404
    Read synthetic buildings data from eGo^n Database.
405
    Parameters
406
    -----------
407
    to_crs : pyproj.crs.crs.CRS
408
        CRS to transform geometries to.
409
    Returns
410
    -------
411
    geopandas.GeoDataFrame
412
        GeoDataFrame containing OSM buildings data.
413
    """
414
    with db.session_scope() as session:
415
        query = session.query(
416
            OsmBuildingsSynthetic.id,
417
            OsmBuildingsSynthetic.area,
418
            OsmBuildingsSynthetic.geom_point.label("geom"),
419
        )
420
421
    return gpd.read_postgis(
422
        query.statement, query.session.bind, index_col="id"
423
    ).to_crs(to_crs)
424
425
426
@timer_func
427
def add_ags_to_buildings(
428
    buildings_gdf: gpd.GeoDataFrame,
429
    municipalities_gdf: gpd.GeoDataFrame,
430
) -> gpd.GeoDataFrame:
431
    """
432
    Add information about AGS ID to buildings.
433
    Parameters
434
    -----------
435
    buildings_gdf : geopandas.GeoDataFrame
436
        GeoDataFrame containing OSM buildings data.
437
    municipalities_gdf : geopandas.GeoDataFrame
438
        GeoDataFrame with municipality data.
439
    Returns
440
    -------
441
    gepandas.GeoDataFrame
442
        GeoDataFrame containing OSM buildings data
443
        with AGS ID added.
444
    """
445
    return buildings_gdf.sjoin(
446
        municipalities_gdf,
447
        how="left",
448
        predicate="intersects",
449
    ).rename(columns={"index_right": "ags"})
450
451
452
def drop_buildings_outside_muns(
453
    buildings_gdf: gpd.GeoDataFrame,
454
) -> gpd.GeoDataFrame:
455
    """
456
    Drop all buildings outside of municipalities.
457
    Parameters
458
    -----------
459
    buildings_gdf : geopandas.GeoDataFrame
460
        GeoDataFrame containing OSM buildings data.
461
    Returns
462
    -------
463
    gepandas.GeoDataFrame
464
        GeoDataFrame containing OSM buildings data
465
        with buildings without an AGS ID dropped.
466
    """
467
    gdf = buildings_gdf.loc[~buildings_gdf.ags.isna()]
468
469
    logger.debug(
470
        f"{len(buildings_gdf) - len(gdf)} "
471
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
472
        f"of {len(buildings_gdf)} values are outside of the municipalities "
473
        "and are therefore dropped."
474
    )
475
476
    return gdf
477
478
479
def egon_building_peak_loads():
480
481
    # use active scenario wich is closest to today
482
    scenario = sorted(SCENARIOS, key=get_scenario_year)[0]
483
484
    sql = f"""
485
    SELECT building_id
486
    FROM demand.egon_building_electricity_peak_loads
487
    WHERE scenario = '{scenario}'
488
    """
489
490
    return (
491
        db.select_dataframe(sql).building_id.astype(int).sort_values().unique()
492
    )
493
494
495
@timer_func
496
def load_building_data():
497
    """
498
    Read buildings from DB
499
    Tables:
500
501
    * `openstreetmap.osm_buildings_filtered` (from OSM)
502
    * `openstreetmap.osm_buildings_synthetic` (synthetic, created by us)
503
504
    Use column `id` for both as it is unique hence you concat both datasets.
505
    If INCLUDE_SYNTHETIC_BUILDINGS is False synthetic buildings will not be
506
    loaded.
507
508
    Returns
509
    -------
510
    gepandas.GeoDataFrame
511
        GeoDataFrame containing OSM buildings data with buildings without an
512
        AGS ID dropped.
513
    """
514
515
    municipalities_gdf = municipality_data()
516
517
    osm_buildings_gdf = osm_buildings(municipalities_gdf.crs)
518
519
    if INCLUDE_SYNTHETIC_BUILDINGS:
520
        synthetic_buildings_gdf = synthetic_buildings(municipalities_gdf.crs)
521
522
        buildings_gdf = gpd.GeoDataFrame(
523
            pd.concat(
524
                [
525
                    osm_buildings_gdf,
526
                    synthetic_buildings_gdf,
527
                ]
528
            ),
529
            geometry="geom",
530
            crs=osm_buildings_gdf.crs,
531
        ).rename(columns={"area": "building_area"})
532
533
        buildings_gdf.index = buildings_gdf.index.astype(int)
534
535
    else:
536
        buildings_gdf = osm_buildings_gdf.rename(
537
            columns={"area": "building_area"}
538
        )
539
540
    if ONLY_BUILDINGS_WITH_DEMAND:
541
        building_ids = egon_building_peak_loads()
542
543
        init_len = len(building_ids)
544
545
        building_ids = np.intersect1d(
546
            list(map(int, building_ids)),
547
            list(map(int, buildings_gdf.index.to_numpy())),
548
        )
549
550
        end_len = len(building_ids)
551
552
        logger.debug(
553
            f"{end_len/init_len * 100: g} % ({end_len} / {init_len}) "
554
            f"of buildings have peak load."
555
        )
556
557
        buildings_gdf = buildings_gdf.loc[building_ids]
558
559
    buildings_ags_gdf = add_ags_to_buildings(buildings_gdf, municipalities_gdf)
560
561
    buildings_ags_gdf = drop_buildings_outside_muns(buildings_ags_gdf)
562
563
    grid_districts_gdf = grid_districts(EPSG)
564
565
    federal_state_gdf = federal_state_data(grid_districts_gdf.crs)
566
567
    grid_federal_state_gdf = overlay_grid_districts_with_counties(
568
        grid_districts_gdf,
569
        federal_state_gdf,
570
    )
571
572
    buildings_overlay_gdf = add_overlay_id_to_buildings(
573
        buildings_ags_gdf,
574
        grid_federal_state_gdf,
575
    )
576
577
    logger.debug("Loaded buildings.")
578
579
    buildings_overlay_gdf = drop_buildings_outside_grids(buildings_overlay_gdf)
580
581
    # overwrite bus_id with data from new table
582
    sql = (
583
        "SELECT building_id, bus_id FROM "
584
        "boundaries.egon_map_zensus_mvgd_buildings"
585
    )
586
    map_building_bus_df = db.select_dataframe(sql)
587
588
    building_ids = np.intersect1d(
589
        list(map(int, map_building_bus_df.building_id.unique())),
590
        list(map(int, buildings_overlay_gdf.index.to_numpy())),
591
    )
592
593
    buildings_within_gdf = buildings_overlay_gdf.loc[building_ids]
594
595
    gdf = (
596
        buildings_within_gdf.reset_index()
597
        .drop(columns=["bus_id"])
598
        .merge(
599
            how="left",
600
            right=map_building_bus_df,
601
            left_on="id",
602
            right_on="building_id",
603
        )
604
        .drop(columns=["building_id"])
605
        .set_index("id")
606
        .sort_index()
607
    )
608
609
    return gdf[~gdf.index.duplicated(keep="first")]
610
611
612
@timer_func
613
def sort_and_qcut_df(
614
    df: pd.DataFrame | gpd.GeoDataFrame,
615
    col: str,
616
    q: int,
617
) -> pd.DataFrame | gpd.GeoDataFrame:
618
    """
619
    Determine the quantile of a given attribute in a (Geo)DataFrame.
620
    Sort the (Geo)DataFrame in ascending order for the given attribute.
621
    Parameters
622
    -----------
623
    df : pandas.DataFrame or geopandas.GeoDataFrame
624
        (Geo)DataFrame to sort and qcut.
625
    col : str
626
        Name of the attribute to sort and qcut the (Geo)DataFrame on.
627
    q : int
628
        Number of quantiles.
629
    Returns
630
    -------
631
    pandas.DataFrame or gepandas.GeoDataFrame
632
        Sorted and qcut (Geo)DataFrame.
633
    """
634
    df = df.sort_values(col, ascending=True)
635
636
    return df.assign(
637
        quant=pd.qcut(
638
            df[col],
639
            q=q,
640
            labels=range(q),
641
        )
642
    )
643
644
645
@timer_func
646
def allocate_pv(
647
    q_mastr_gdf: gpd.GeoDataFrame,
648
    q_buildings_gdf: gpd.GeoDataFrame,
649
    seed: int,
650
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
651
    """
652
    Allocate the MaStR pv generators to the OSM buildings.
653
    This will determine a building for each pv generator if there are more
654
    buildings than generators within a given AGS. Primarily generators are
655
    distributed with the same qunatile as the buildings. Multiple assignment
656
    is excluded.
657
    Parameters
658
    -----------
659
    q_mastr_gdf : geopandas.GeoDataFrame
660
        GeoDataFrame containing geocoded and qcut MaStR data.
661
    q_buildings_gdf : geopandas.GeoDataFrame
662
        GeoDataFrame containing qcut OSM buildings data.
663
    seed : int
664
        Seed to use for random operations with NumPy and pandas.
665
    Returns
666
    -------
667
    tuple with two geopandas.GeoDataFrame s
668
        GeoDataFrame containing MaStR data allocated to building IDs.
669
        GeoDataFrame containing building data allocated to MaStR IDs.
670
    """
671
    rng = default_rng(seed=seed)
672
673
    q_buildings_gdf = q_buildings_gdf.assign(gens_id=np.nan).sort_values(
674
        by=["ags", "quant"]
675
    )
676
    q_mastr_gdf = q_mastr_gdf.assign(building_id=np.nan).sort_values(
677
        by=["ags", "quant"]
678
    )
679
680
    ags_list = q_buildings_gdf.ags.unique()
681
682
    if TEST_RUN:
683
        ags_list = ags_list[:250]
684
685
    num_ags = len(ags_list)
686
687
    t0 = perf_counter()
688
689
    for count, ags in enumerate(ags_list):
690
        buildings = q_buildings_gdf.loc[q_buildings_gdf.ags == ags]
691
        gens = q_mastr_gdf.loc[q_mastr_gdf.ags == ags]
692
693
        len_build = len(buildings)
694
        len_gens = len(gens)
695
696
        if len_build < len_gens:
697
            gens = gens.sample(len_build, random_state=RandomState(seed=seed))
698
            logger.error(
699
                f"There are {len_gens} generators and only {len_build}"
700
                f" buildings in AGS {ags}. {len_gens - len(gens)} "
701
                "generators were truncated to match the amount of buildings."
702
            )
703
704
            assert len_build == len(gens)
705
706
        for quant in gens.quant.unique():
707
            q_buildings = buildings.loc[buildings.quant == quant]
708
            q_gens = gens.loc[gens.quant == quant]
709
710
            len_build = len(q_buildings)
711
            len_gens = len(q_gens)
712
713
            if len_build < len_gens:
714
                delta = len_gens - len_build
715
716
                logger.warning(
717
                    f"There are {len_gens} generators and only {len_build} "
718
                    f"buildings in AGS {ags} and quantile {quant}. {delta} "
719
                    f"buildings from AGS {ags} will be added randomly."
720
                )
721
722
                add_buildings = pd.Index(
723
                    rng.choice(
724
                        list(set(buildings.index) - set(q_buildings.index)),
725
                        size=delta,
726
                        replace=False,
727
                    )
728
                )
729
730
                chosen_buildings = q_buildings.index.union(add_buildings)
731
732
            else:
733
                chosen_buildings = rng.choice(
734
                    q_buildings.index,
735
                    size=len_gens,
736
                    replace=False,
737
                )
738
739
            q_buildings_gdf.loc[chosen_buildings, "gens_id"] = q_gens.index
740
            buildings = buildings.drop(chosen_buildings)
741
742
        if count % 500 == 0:
743
            logger.debug(
744
                f"Allocation of {count / num_ags * 100:g} % of AGS done. "
745
                f"It took {perf_counter() - t0:g} seconds."
746
            )
747
748
            t0 = perf_counter()
749
750
    assigned_buildings = q_buildings_gdf.loc[~q_buildings_gdf.gens_id.isna()]
751
752
    assert len(assigned_buildings) == len(assigned_buildings.gens_id.unique())
753
754
    q_mastr_gdf.loc[assigned_buildings.gens_id, "building_id"] = (
755
        assigned_buildings.index
756
    )
757
758
    assigned_gens = q_mastr_gdf.loc[~q_mastr_gdf.building_id.isna()]
759
760
    assert len(assigned_buildings) == len(assigned_gens)
761
762
    logger.debug("Allocated status quo generators to buildings.")
763
764
    return frame_to_numeric(q_mastr_gdf), frame_to_numeric(q_buildings_gdf)
765
766
767
def frame_to_numeric(
768
    df: pd.DataFrame | gpd.GeoDataFrame,
769
) -> pd.DataFrame | gpd.GeoDataFrame:
770
    """
771
    Try to convert all columns of a DataFrame to numeric ignoring errors.
772
    Parameters
773
    ----------
774
    df : pandas.DataFrame or geopandas.GeoDataFrame
775
    Returns
776
    -------
777
    pandas.DataFrame or geopandas.GeoDataFrame
778
    """
779
    if str(df.index.dtype) == "object":
780
        df.index = pd.to_numeric(df.index, errors="ignore")
781
782
    for col in df.columns:
783
        if str(df[col].dtype) == "object":
784
            df[col] = pd.to_numeric(df[col], errors="ignore")
785
786
    return df
787
788
789
def validate_output(
790
    desagg_mastr_gdf: pd.DataFrame | gpd.GeoDataFrame,
791
    desagg_buildings_gdf: pd.DataFrame | gpd.GeoDataFrame,
792
) -> None:
793
    """
794
    Validate output.
795
796
    * Validate that there are exactly as many buildings with a pv system as
797
      there are pv systems with a building
798
    * Validate that the building IDs with a pv system are the same building
799
      IDs as assigned to the pv systems
800
    * Validate that the pv system IDs with a building are the same pv system
801
      IDs as assigned to the buildings
802
803
    Parameters
804
    -----------
805
    desagg_mastr_gdf : geopandas.GeoDataFrame
806
        GeoDataFrame containing MaStR data allocated to building IDs.
807
    desagg_buildings_gdf : geopandas.GeoDataFrame
808
        GeoDataFrame containing building data allocated to MaStR IDs.
809
    """
810
    assert len(
811
        desagg_mastr_gdf.loc[~desagg_mastr_gdf.building_id.isna()]
812
    ) == len(desagg_buildings_gdf.loc[~desagg_buildings_gdf.gens_id.isna()])
813
    assert (
814
        np.sort(
815
            desagg_mastr_gdf.loc[
816
                ~desagg_mastr_gdf.building_id.isna()
817
            ].building_id.unique()
818
        )
819
        == np.sort(
820
            desagg_buildings_gdf.loc[
821
                ~desagg_buildings_gdf.gens_id.isna()
822
            ].index.unique()
823
        )
824
    ).all()
825
    assert (
826
        np.sort(
827
            desagg_mastr_gdf.loc[
828
                ~desagg_mastr_gdf.building_id.isna()
829
            ].index.unique()
830
        )
831
        == np.sort(
832
            desagg_buildings_gdf.loc[
833
                ~desagg_buildings_gdf.gens_id.isna()
834
            ].gens_id.unique()
835
        )
836
    ).all()
837
838
    logger.debug("Validated output.")
839
840
841
def drop_unallocated_gens(
842
    gdf: gpd.GeoDataFrame,
843
) -> gpd.GeoDataFrame:
844
    """
845
    Drop generators which did not get allocated.
846
847
    Parameters
848
    -----------
849
    gdf : geopandas.GeoDataFrame
850
        GeoDataFrame containing MaStR data allocated to building IDs.
851
    Returns
852
    -------
853
    geopandas.GeoDataFrame
854
        GeoDataFrame containing MaStR data with generators dropped which did
855
        not get allocated.
856
    """
857
    init_len = len(gdf)
858
    gdf = gdf.loc[~gdf.building_id.isna()]
859
    end_len = len(gdf)
860
861
    logger.debug(
862
        f"Dropped {init_len - end_len} "
863
        f"({((init_len - end_len) / init_len) * 100:g}%)"
864
        f" of {init_len} unallocated rows from MaStR DataFrame."
865
    )
866
867
    return gdf
868
869
870
@timer_func
871
def allocate_to_buildings(
872
    mastr_gdf: gpd.GeoDataFrame,
873
    buildings_gdf: gpd.GeoDataFrame,
874
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
875
    """
876
    Allocate status quo pv rooftop generators to buildings.
877
    Parameters
878
    -----------
879
    mastr_gdf : geopandas.GeoDataFrame
880
        GeoDataFrame containing MaStR data with geocoded locations.
881
    buildings_gdf : geopandas.GeoDataFrame
882
        GeoDataFrame containing OSM buildings data with buildings without an
883
        AGS ID dropped.
884
    Returns
885
    -------
886
    tuple with two geopandas.GeoDataFrame s
887
        GeoDataFrame containing MaStR data allocated to building IDs.
888
        GeoDataFrame containing building data allocated to MaStR IDs.
889
    """
890
    logger.debug("Starting allocation of status quo.")
891
892
    q_mastr_gdf = sort_and_qcut_df(mastr_gdf, col="capacity", q=Q)
893
    q_buildings_gdf = sort_and_qcut_df(buildings_gdf, col="building_area", q=Q)
894
895
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_pv(
896
        q_mastr_gdf, q_buildings_gdf, SEED
897
    )
898
899
    validate_output(desagg_mastr_gdf, desagg_buildings_gdf)
900
901
    return drop_unallocated_gens(desagg_mastr_gdf), desagg_buildings_gdf
902
903
904
@timer_func
905
def grid_districts(
906
    epsg: int,
907
) -> gpd.GeoDataFrame:
908
    """
909
    Load mv grid district geo data from eGo^n Database as
910
    geopandas.GeoDataFrame.
911
    Parameters
912
    -----------
913
    epsg : int
914
        EPSG ID to use as CRS.
915
    Returns
916
    -------
917
    geopandas.GeoDataFrame
918
        GeoDataFrame containing mv grid district ID and geo shapes data.
919
    """
920
    gdf = db.select_geodataframe(
921
        """
922
        SELECT bus_id, geom
923
        FROM grid.egon_mv_grid_district
924
        ORDER BY bus_id
925
        """,
926
        index_col="bus_id",
927
        geom_col="geom",
928
        epsg=epsg,
929
    )
930
931
    gdf.index = gdf.index.astype(int)
932
933
    logger.debug("Grid districts loaded.")
934
935
    return gdf
936
937
938
def scenario_data(
939
    carrier: str = "solar_rooftop",
940
    scenario: str = "eGon2035",
941
) -> pd.DataFrame:
942
    """
943
    Get scenario capacity data from eGo^n Database.
944
    Parameters
945
    -----------
946
    carrier : str
947
        Carrier type to filter table by.
948
    scenario : str
949
        Scenario to filter table by.
950
    Returns
951
    -------
952
    geopandas.GeoDataFrame
953
        GeoDataFrame with scenario capacity data in GW.
954
    """
955
    with db.session_scope() as session:
956
        query = session.query(EgonScenarioCapacities).filter(
957
            EgonScenarioCapacities.carrier == carrier,
958
            EgonScenarioCapacities.scenario_name == scenario,
959
        )
960
961
    df = pd.read_sql(
962
        query.statement, query.session.bind, index_col="index"
963
    ).sort_index()
964
965
    logger.debug("Scenario capacity data loaded.")
966
967
    return df
968
969
970 View Code Duplication
class Vg250Lan(Base):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
971
    """
972
    Class definition of table boundaries.vg250_lan.
973
974
    """
975
    __tablename__ = "vg250_lan"
976
    __table_args__ = {"schema": "boundaries"}
977
978
    id = Column(BigInteger, primary_key=True, index=True)
979
    ade = Column(BigInteger)
980
    gf = Column(BigInteger)
981
    bsg = Column(BigInteger)
982
    ars = Column(String)
983
    ags = Column(String)
984
    sdv_ars = Column(String)
985
    gen = Column(String)
986
    bez = Column(String)
987
    ibz = Column(BigInteger)
988
    bem = Column(String)
989
    nbd = Column(String)
990
    sn_l = Column(String)
991
    sn_r = Column(String)
992
    sn_k = Column(String)
993
    sn_v1 = Column(String)
994
    sn_v2 = Column(String)
995
    sn_g = Column(String)
996
    fk_s3 = Column(String)
997
    nuts = Column(String)
998
    ars_0 = Column(String)
999
    ags_0 = Column(String)
1000
    wsk = Column(String)
1001
    debkg_id = Column(String)
1002
    rs = Column(String)
1003
    sdv_rs = Column(String)
1004
    rs_0 = Column(String)
1005
    geometry = Column(Geometry(srid=EPSG), index=True)
1006
1007
1008
def federal_state_data(to_crs: CRS) -> gpd.GeoDataFrame:
1009
    """
1010
    Get feder state data from eGo^n Database.
1011
    Parameters
1012
    -----------
1013
    to_crs : pyproj.crs.crs.CRS
1014
        CRS to transform geometries to.
1015
    Returns
1016
    -------
1017
    geopandas.GeoDataFrame
1018
        GeoDataFrame with federal state data.
1019
    """
1020
    with db.session_scope() as session:
1021
        query = session.query(
1022
            Vg250Lan.id, Vg250Lan.nuts, Vg250Lan.geometry.label("geom")
1023
        )
1024
1025
        gdf = gpd.read_postgis(
1026
            query.statement, session.connection(), index_col="id"
1027
        ).to_crs(to_crs)
1028
1029
    logger.debug("Federal State data loaded.")
1030
1031
    return gdf
1032
1033
1034
@timer_func
1035
def overlay_grid_districts_with_counties(
1036
    mv_grid_district_gdf: gpd.GeoDataFrame,
1037
    federal_state_gdf: gpd.GeoDataFrame,
1038
) -> gpd.GeoDataFrame:
1039
    """
1040
    Calculate the intersections of mv grid districts and counties.
1041
    Parameters
1042
    -----------
1043
    mv_grid_district_gdf : gpd.GeoDataFrame
1044
        GeoDataFrame containing mv grid district ID and geo shapes data.
1045
    federal_state_gdf : gpd.GeoDataFrame
1046
        GeoDataFrame with federal state data.
1047
    Returns
1048
    -------
1049
    geopandas.GeoDataFrame
1050
        GeoDataFrame containing OSM buildings data.
1051
    """
1052
    logger.debug(
1053
        "Calculating intersection overlay between mv grid districts and "
1054
        "counties. This may take a while..."
1055
    )
1056
1057
    gdf = gpd.overlay(
1058
        federal_state_gdf.to_crs(mv_grid_district_gdf.crs),
1059
        mv_grid_district_gdf.reset_index(),
1060
        how="intersection",
1061
        keep_geom_type=True,
1062
    )
1063
1064
    logger.debug("Done!")
1065
1066
    return gdf
1067
1068
1069
@timer_func
1070
def add_overlay_id_to_buildings(
1071
    buildings_gdf: gpd.GeoDataFrame,
1072
    grid_federal_state_gdf: gpd.GeoDataFrame,
1073
) -> gpd.GeoDataFrame:
1074
    """
1075
    Add information about overlay ID to buildings.
1076
    Parameters
1077
    -----------
1078
    buildings_gdf : geopandas.GeoDataFrame
1079
        GeoDataFrame containing OSM buildings data.
1080
    grid_federal_state_gdf : geopandas.GeoDataFrame
1081
        GeoDataFrame with intersection shapes between counties and grid
1082
        districts.
1083
    Returns
1084
    -------
1085
    geopandas.GeoDataFrame
1086
        GeoDataFrame containing OSM buildings data with overlay ID added.
1087
    """
1088
    gdf = (
1089
        buildings_gdf.to_crs(grid_federal_state_gdf.crs)
1090
        .sjoin(
1091
            grid_federal_state_gdf,
1092
            how="left",
1093
            predicate="intersects",
1094
        )
1095
        .rename(columns={"index_right": "overlay_id"})
1096
    )
1097
1098
    logger.debug("Added overlay ID to OSM buildings.")
1099
1100
    return gdf
1101
1102
1103
def drop_buildings_outside_grids(
1104
    buildings_gdf: gpd.GeoDataFrame,
1105
) -> gpd.GeoDataFrame:
1106
    """
1107
    Drop all buildings outside of grid areas.
1108
    Parameters
1109
    -----------
1110
    buildings_gdf : geopandas.GeoDataFrame
1111
        GeoDataFrame containing OSM buildings data.
1112
    Returns
1113
    -------
1114
    gepandas.GeoDataFrame
1115
        GeoDataFrame containing OSM buildings data
1116
        with buildings without an bus ID dropped.
1117
    """
1118
    gdf = buildings_gdf.loc[~buildings_gdf.bus_id.isna()]
1119
1120
    logger.debug(
1121
        f"{len(buildings_gdf) - len(gdf)} "
1122
        f"({(len(buildings_gdf) - len(gdf)) / len(buildings_gdf) * 100:g}%) "
1123
        f"of {len(buildings_gdf)} values are outside of the grid areas "
1124
        "and are therefore dropped."
1125
    )
1126
1127
    return gdf
1128
1129
1130
def cap_per_bus_id(
1131
    scenario: str,
1132
) -> pd.DataFrame:
1133
    """
1134
    Get table with total pv rooftop capacity per grid district.
1135
1136
    Parameters
1137
    -----------
1138
    scenario : str
1139
        Scenario name.
1140
    Returns
1141
    -------
1142
    pandas.DataFrame
1143
        DataFrame with total rooftop capacity per mv grid.
1144
    """
1145
    if "status" in scenario:
1146
        sources = config.datasets()["solar_rooftop"]["sources"]
1147
1148
        sql = f"""
1149
        SELECT bus_id, SUM(el_capacity) as capacity
1150
        FROM {sources['power_plants']['schema']}.{sources['power_plants']['table']}
1151
        WHERE carrier = 'solar_rooftop'
1152
        AND scenario = '{scenario}'
1153
        GROUP BY bus_id
1154
        """
1155
1156
        df = db.select_dataframe(sql, index_col="bus_id")
1157
1158
    else:
1159
        targets = config.datasets()["solar_rooftop"]["targets"]
1160
1161
        sql = f"""
1162
        SELECT bus as bus_id, control, p_nom as capacity
1163
        FROM {targets['generators']['schema']}.{targets['generators']['table']}
1164
        WHERE carrier = 'solar_rooftop'
1165
        AND scn_name = '{scenario}'
1166
        """
1167
1168
        df = db.select_dataframe(sql, index_col="bus_id")
1169
        df = df.loc[df.control != "Slack"]
1170
1171
    return df
1172
1173
1174
def determine_end_of_life_gens(
1175
    mastr_gdf: gpd.GeoDataFrame,
1176
    scenario_timestamp: pd.Timestamp,
1177
    pv_rooftop_lifetime: pd.Timedelta,
1178
) -> gpd.GeoDataFrame:
1179
    """
1180
    Determine if an old PV system has reached its end of life.
1181
    Parameters
1182
    -----------
1183
    mastr_gdf : geopandas.GeoDataFrame
1184
        GeoDataFrame containing geocoded MaStR data.
1185
    scenario_timestamp : pandas.Timestamp
1186
        Timestamp at which the scenario takes place.
1187
    pv_rooftop_lifetime : pandas.Timedelta
1188
        Average expected lifetime of PV rooftop systems.
1189
    Returns
1190
    -------
1191
    geopandas.GeoDataFrame
1192
        GeoDataFrame containing geocoded MaStR data and info if the system
1193
        has reached its end of life.
1194
    """
1195
    before = mastr_gdf.capacity.sum()
1196
1197
    mastr_gdf = mastr_gdf.assign(
1198
        age=scenario_timestamp - mastr_gdf.commissioning_date
1199
    )
1200
1201
    mastr_gdf = mastr_gdf.assign(
1202
        end_of_life=pv_rooftop_lifetime < mastr_gdf.age
1203
    )
1204
1205
    after = mastr_gdf.loc[~mastr_gdf.end_of_life].capacity.sum()
1206
1207
    logger.debug(
1208
        f"Determined if pv rooftop systems reached their end of life.\nTotal "
1209
        f"capacity: {before}\nActive capacity: {after}"
1210
    )
1211
1212
    return mastr_gdf
1213
1214
1215
def calculate_max_pv_cap_per_building(
1216
    buildings_gdf: gpd.GeoDataFrame,
1217
    mastr_gdf: gpd.GeoDataFrame,
1218
    pv_cap_per_sq_m: float | int,
1219
    roof_factor: float | int,
1220
) -> gpd.GeoDataFrame:
1221
    """
1222
    Calculate the estimated maximum possible PV capacity per building.
1223
1224
    Parameters
1225
    -----------
1226
    buildings_gdf : geopandas.GeoDataFrame
1227
        GeoDataFrame containing OSM buildings data.
1228
    mastr_gdf : geopandas.GeoDataFrame
1229
        GeoDataFrame containing geocoded MaStR data.
1230
    pv_cap_per_sq_m : float, int
1231
        Average expected, installable PV capacity per square meter.
1232
    roof_factor : float, int
1233
        Average for PV usable roof area share.
1234
    Returns
1235
    -------
1236
    geopandas.GeoDataFrame
1237
        GeoDataFrame containing OSM buildings data with estimated maximum PV
1238
        capacity.
1239
    """
1240
    gdf = (
1241
        buildings_gdf.reset_index()
1242
        .rename(columns={"index": "id"})
1243
        .merge(
1244
            mastr_gdf[
1245
                [
1246
                    "capacity",
1247
                    "end_of_life",
1248
                    "building_id",
1249
                    "orientation_uniform",
1250
                    "orientation_primary",
1251
                    "orientation_primary_angle",
1252
                ]
1253
            ],
1254
            how="left",
1255
            left_on="id",
1256
            right_on="building_id",
1257
        )
1258
        .set_index("id")
1259
        .drop(columns="building_id")
1260
    )
1261
1262
    return gdf.assign(
1263
        max_cap=gdf.building_area.multiply(roof_factor * pv_cap_per_sq_m),
1264
        end_of_life=gdf.end_of_life.fillna(True).astype(bool),
1265
        bus_id=gdf.bus_id.astype(int),
1266
    )
1267
1268
1269
def calculate_building_load_factor(
1270
    mastr_gdf: gpd.GeoDataFrame,
1271
    buildings_gdf: gpd.GeoDataFrame,
1272
    rounding: int = 4,
1273
) -> gpd.GeoDataFrame:
1274
    """
1275
    Calculate the roof load factor from existing PV systems.
1276
    Parameters
1277
    -----------
1278
    mastr_gdf : geopandas.GeoDataFrame
1279
        GeoDataFrame containing geocoded MaStR data.
1280
    buildings_gdf : geopandas.GeoDataFrame
1281
        GeoDataFrame containing OSM buildings data.
1282
    rounding : int
1283
        Rounding to use for load factor.
1284
    Returns
1285
    -------
1286
    geopandas.GeoDataFrame
1287
        GeoDataFrame containing geocoded MaStR data with calculated load
1288
        factor.
1289
    """
1290
    gdf = mastr_gdf.merge(
1291
        buildings_gdf[["max_cap", "building_area"]]
1292
        .loc[~buildings_gdf["max_cap"].isna()]
1293
        .reset_index(),
1294
        how="left",
1295
        left_on="building_id",
1296
        right_on="id",
1297
    ).set_index("id")
1298
1299
    return gdf.assign(load_factor=(gdf.capacity / gdf.max_cap).round(rounding))
1300
1301
1302
def get_probability_for_property(
1303
    mastr_gdf: gpd.GeoDataFrame,
1304
    cap_range: tuple[int | float, int | float],
1305
    prop: str,
1306
) -> tuple[np.array, np.array]:
1307
    """
1308
    Calculate the probability of the different options of a property of the
1309
    existing PV plants.
1310
    Parameters
1311
    -----------
1312
    mastr_gdf : geopandas.GeoDataFrame
1313
        GeoDataFrame containing geocoded MaStR data.
1314
    cap_range : tuple(int, int)
1315
        Capacity range of PV plants to look at.
1316
    prop : str
1317
        Property to calculate probabilities for. String needs to be in columns
1318
        of mastr_gdf.
1319
    Returns
1320
    -------
1321
    tuple
1322
        numpy.array
1323
            Unique values of property.
1324
        numpy.array
1325
            Probabilties per unique value.
1326
    """
1327
    cap_range_gdf = mastr_gdf.loc[
1328
        (mastr_gdf.capacity > cap_range[0])
1329
        & (mastr_gdf.capacity <= cap_range[1])
1330
    ]
1331
1332
    if prop == "load_factor":
1333
        cap_range_gdf = cap_range_gdf.loc[cap_range_gdf[prop] <= 1]
1334
1335
    count = Counter(
1336
        cap_range_gdf[prop].loc[
1337
            ~cap_range_gdf[prop].isna()
1338
            & ~cap_range_gdf[prop].isnull()
1339
            & ~(cap_range_gdf[prop] == "None")
1340
        ]
1341
    )
1342
1343
    values = np.array(list(count.keys()))
1344
    probabilities = np.fromiter(count.values(), dtype=float)
1345
    probabilities = probabilities / np.sum(probabilities)
1346
1347
    return values, probabilities
1348
1349
1350
@timer_func
1351
def probabilities(
1352
    mastr_gdf: gpd.GeoDataFrame,
1353
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1354
    properties: list[str] | None = None,
1355
) -> dict:
1356
    """
1357
    Calculate the probability of the different options of properties of the
1358
    existing PV plants.
1359
    Parameters
1360
    -----------
1361
    mastr_gdf : geopandas.GeoDataFrame
1362
        GeoDataFrame containing geocoded MaStR data.
1363
    cap_ranges : list(tuple(int, int))
1364
        List of capacity ranges to distinguish between. The first tuple should
1365
        start with a zero and the last one should end with infinite.
1366
    properties : list(str)
1367
        List of properties to calculate probabilities for. Strings need to be
1368
        in columns of mastr_gdf.
1369
    Returns
1370
    -------
1371
    dict
1372
        Dictionary with values and probabilities per capacity range.
1373
    """
1374
    if cap_ranges is None:
1375
        cap_ranges = [
1376
            (0, 30 / 10**3),
1377
            (30 / 10**3, 100 / 10**3),
1378
            (100 / 10**3, float("inf")),
1379
        ]
1380
    if properties is None:
1381
        properties = [
1382
            "orientation_uniform",
1383
            "orientation_primary",
1384
            "orientation_primary_angle",
1385
            "load_factor",
1386
        ]
1387
1388
    prob_dict = {}
1389
1390
    for cap_range in cap_ranges:
1391
        prob_dict[cap_range] = {
1392
            "values": {},
1393
            "probabilities": {},
1394
        }
1395
1396
        for prop in properties:
1397
            v, p = get_probability_for_property(
1398
                mastr_gdf,
1399
                cap_range,
1400
                prop,
1401
            )
1402
1403
            prob_dict[cap_range]["values"][prop] = v
1404
            prob_dict[cap_range]["probabilities"][prop] = p
1405
1406
    return prob_dict
1407
1408
1409
def cap_share_per_cap_range(
1410
    mastr_gdf: gpd.GeoDataFrame,
1411
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1412
) -> dict[tuple[int | float, int | float], float]:
1413
    """
1414
    Calculate the share of PV capacity from the total PV capacity within
1415
    capacity ranges.
1416
1417
    Parameters
1418
    -----------
1419
    mastr_gdf : geopandas.GeoDataFrame
1420
        GeoDataFrame containing geocoded MaStR data.
1421
    cap_ranges : list(tuple(int, int))
1422
        List of capacity ranges to distinguish between. The first tuple should
1423
        start with a zero and the last one should end with infinite.
1424
    Returns
1425
    -------
1426
    dict
1427
        Dictionary with share of PV capacity from the total PV capacity within
1428
        capacity ranges.
1429
    """
1430
    if cap_ranges is None:
1431
        cap_ranges = [
1432
            (0, 30 / 10**3),
1433
            (30 / 10**3, 100 / 10**3),
1434
            (100 / 10**3, float("inf")),
1435
        ]
1436
1437
    cap_share_dict = {}
1438
1439
    total_cap = mastr_gdf.capacity.sum()
1440
1441
    for cap_range in cap_ranges:
1442
        cap_share = (
1443
            mastr_gdf.loc[
1444
                (mastr_gdf.capacity > cap_range[0])
1445
                & (mastr_gdf.capacity <= cap_range[1])
1446
            ].capacity.sum()
1447
            / total_cap
1448
        )
1449
1450
        cap_share_dict[cap_range] = cap_share
1451
1452
    return cap_share_dict
1453
1454
1455
def mean_load_factor_per_cap_range(
1456
    mastr_gdf: gpd.GeoDataFrame,
1457
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1458
) -> dict[tuple[int | float, int | float], float]:
1459
    """
1460
    Calculate the mean roof load factor per capacity range from existing PV
1461
    plants.
1462
    Parameters
1463
    -----------
1464
    mastr_gdf : geopandas.GeoDataFrame
1465
        GeoDataFrame containing geocoded MaStR data.
1466
    cap_ranges : list(tuple(int, int))
1467
        List of capacity ranges to distinguish between. The first tuple should
1468
        start with a zero and the last one should end with infinite.
1469
    Returns
1470
    -------
1471
    dict
1472
        Dictionary with mean roof load factor per capacity range.
1473
    """
1474
    if cap_ranges is None:
1475
        cap_ranges = [
1476
            (0, 30 / 10**3),
1477
            (30 / 10**3, 100 / 10**3),
1478
            (100 / 10**3, float("inf")),
1479
        ]
1480
1481
    load_factor_dict = {}
1482
1483
    for cap_range in cap_ranges:
1484
        load_factor = mastr_gdf.loc[
1485
            (mastr_gdf.load_factor <= 1)
1486
            & (mastr_gdf.capacity > cap_range[0])
1487
            & (mastr_gdf.capacity <= cap_range[1])
1488
        ].load_factor.mean()
1489
1490
        load_factor_dict[cap_range] = load_factor
1491
1492
    return load_factor_dict
1493
1494
1495
def building_area_range_per_cap_range(
1496
    mastr_gdf: gpd.GeoDataFrame,
1497
    cap_ranges: list[tuple[int | float, int | float]] | None = None,
1498
    min_building_size: int | float = 10.0,
1499
    upper_quantile: float = 0.95,
1500
    lower_quantile: float = 0.05,
1501
) -> dict[tuple[int | float, int | float], tuple[int | float, int | float]]:
1502
    """
1503
    Estimate normal building area range per capacity range.
1504
    Calculate the mean roof load factor per capacity range from existing PV
1505
    plants.
1506
    Parameters
1507
    -----------
1508
    mastr_gdf : geopandas.GeoDataFrame
1509
        GeoDataFrame containing geocoded MaStR data.
1510
    cap_ranges : list(tuple(int, int))
1511
        List of capacity ranges to distinguish between. The first tuple should
1512
        start with a zero and the last one should end with infinite.
1513
    min_building_size : int, float
1514
        Minimal building size to consider for PV plants.
1515
    upper_quantile : float
1516
        Upper quantile to estimate maximum building size per capacity range.
1517
    lower_quantile : float
1518
        Lower quantile to estimate minimum building size per capacity range.
1519
    Returns
1520
    -------
1521
    dict
1522
        Dictionary with estimated normal building area range per capacity
1523
        range.
1524
    """
1525
    if cap_ranges is None:
1526
        cap_ranges = [
1527
            (0, 30 / 10**3),
1528
            (30 / 10**3, 100 / 10**3),
1529
            (100 / 10**3, float("inf")),
1530
        ]
1531
1532
    building_area_range_dict = {}
1533
1534
    n_ranges = len(cap_ranges)
1535
1536
    for count, cap_range in enumerate(cap_ranges):
1537
        cap_range_gdf = mastr_gdf.loc[
1538
            (mastr_gdf.capacity > cap_range[0])
1539
            & (mastr_gdf.capacity <= cap_range[1])
1540
        ]
1541
1542
        if count == 0:
1543
            building_area_range_dict[cap_range] = (
1544
                min_building_size,
1545
                cap_range_gdf.building_area.quantile(upper_quantile),
1546
            )
1547
        elif count == n_ranges - 1:
1548
            building_area_range_dict[cap_range] = (
1549
                cap_range_gdf.building_area.quantile(lower_quantile),
1550
                float("inf"),
1551
            )
1552
        else:
1553
            building_area_range_dict[cap_range] = (
1554
                cap_range_gdf.building_area.quantile(lower_quantile),
1555
                cap_range_gdf.building_area.quantile(upper_quantile),
1556
            )
1557
1558
    values = list(building_area_range_dict.values())
1559
1560
    building_area_range_normed_dict = {}
1561
1562
    for count, (cap_range, (min_area, max_area)) in enumerate(
1563
        building_area_range_dict.items()
1564
    ):
1565
        if count == 0:
1566
            building_area_range_normed_dict[cap_range] = (
1567
                min_area,
1568
                np.mean((values[count + 1][0], max_area)),
1569
            )
1570
        elif count == n_ranges - 1:
1571
            building_area_range_normed_dict[cap_range] = (
1572
                np.mean((values[count - 1][1], min_area)),
1573
                max_area,
1574
            )
1575
        else:
1576
            building_area_range_normed_dict[cap_range] = (
1577
                np.mean((values[count - 1][1], min_area)),
1578
                np.mean((values[count + 1][0], max_area)),
1579
            )
1580
1581
    return building_area_range_normed_dict
1582
1583
1584
@timer_func
1585
def desaggregate_pv_in_mv_grid(
1586
    buildings_gdf: gpd.GeoDataFrame,
1587
    pv_cap: float | int,
1588
    **kwargs,
1589
) -> gpd.GeoDataFrame:
1590
    """
1591
    Desaggregate PV capacity on buildings within a given grid district.
1592
    Parameters
1593
    -----------
1594
    buildings_gdf : geopandas.GeoDataFrame
1595
        GeoDataFrame containing buildings within the grid district.
1596
    pv_cap : float, int
1597
        PV capacity to desaggregate.
1598
    Other Parameters
1599
    -----------
1600
    prob_dict : dict
1601
        Dictionary with values and probabilities per capacity range.
1602
    cap_share_dict : dict
1603
        Dictionary with share of PV capacity from the total PV capacity within
1604
        capacity ranges.
1605
    building_area_range_dict : dict
1606
        Dictionary with estimated normal building area range per capacity
1607
        range.
1608
    load_factor_dict : dict
1609
        Dictionary with mean roof load factor per capacity range.
1610
    seed : int
1611
        Seed to use for random operations with NumPy and pandas.
1612
    pv_cap_per_sq_m : float, int
1613
        Average expected, installable PV capacity per square meter.
1614
    Returns
1615
    -------
1616
    geopandas.GeoDataFrame
1617
        GeoDataFrame containing OSM building data with desaggregated PV
1618
        plants.
1619
    """
1620
    bus_id = int(buildings_gdf.bus_id.iat[0])
1621
1622
    rng = default_rng(seed=kwargs["seed"])
1623
    random_state = RandomState(seed=kwargs["seed"])
1624
1625
    results_df = pd.DataFrame(columns=buildings_gdf.columns)
1626
1627
    for cap_range, share in kwargs["cap_share_dict"].items():
1628
        pv_cap_range = pv_cap * share
1629
1630
        b_area_min, b_area_max = kwargs["building_area_range_dict"][cap_range]
1631
1632
        cap_range_buildings_gdf = buildings_gdf.loc[
1633
            ~buildings_gdf.index.isin(results_df.index)
1634
            & (buildings_gdf.building_area > b_area_min)
1635
            & (buildings_gdf.building_area <= b_area_max)
1636
        ]
1637
1638
        mean_load_factor = kwargs["load_factor_dict"][cap_range]
1639
        cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1640
            mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1641
            load_factor=np.nan,
1642
            capacity=np.nan,
1643
        )
1644
1645
        total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1646
1647
        if total_mean_cap == 0:
1648
            logger.warning(
1649
                f"There are no matching roof for capacity range {cap_range} "
1650
                f"kW in grid {bus_id}. Using all buildings as fallback."
1651
            )
1652
1653
            cap_range_buildings_gdf = buildings_gdf.loc[
1654
                ~buildings_gdf.index.isin(results_df.index)
1655
            ]
1656
1657
            if len(cap_range_buildings_gdf) == 0:
1658
                logger.warning(
1659
                    "There are no roofes available for capacity range "
1660
                    f"{cap_range} kW in grid {bus_id}. Allowing dual use."
1661
                )
1662
                cap_range_buildings_gdf = buildings_gdf.copy()
1663
1664
            cap_range_buildings_gdf = cap_range_buildings_gdf.assign(
1665
                mean_cap=cap_range_buildings_gdf.max_cap * mean_load_factor,
1666
                load_factor=np.nan,
1667
                capacity=np.nan,
1668
            )
1669
1670
            total_mean_cap = cap_range_buildings_gdf.mean_cap.sum()
1671
1672
        elif total_mean_cap < pv_cap_range:
1673
            logger.warning(
1674
                f"Average roof utilization of the roof area in grid {bus_id} "
1675
                f"and capacity range {cap_range} kW is not sufficient. The "
1676
                "roof utilization will be above average."
1677
            )
1678
1679
        frac = max(
1680
            pv_cap_range / total_mean_cap,
1681
            1 / len(cap_range_buildings_gdf),
1682
        )
1683
1684
        samples_gdf = cap_range_buildings_gdf.sample(
1685
            frac=min(1, frac),
1686
            random_state=random_state,
1687
        )
1688
1689
        cap_range_dict = kwargs["prob_dict"][cap_range]
1690
1691
        values_dict = cap_range_dict["values"]
1692
        p_dict = cap_range_dict["probabilities"]
1693
1694
        load_factors = rng.choice(
1695
            a=values_dict["load_factor"],
1696
            size=len(samples_gdf),
1697
            p=p_dict["load_factor"],
1698
        )
1699
1700
        samples_gdf = samples_gdf.assign(
1701
            load_factor=load_factors,
1702
            capacity=(
1703
                samples_gdf.building_area
1704
                * load_factors
1705
                * kwargs["pv_cap_per_sq_m"]
1706
            ).clip(lower=0.4),
1707
        )
1708
1709
        missing_factor = pv_cap_range / samples_gdf.capacity.sum()
1710
1711
        samples_gdf = samples_gdf.assign(
1712
            capacity=(samples_gdf.capacity * missing_factor),
1713
            load_factor=(samples_gdf.load_factor * missing_factor),
1714
        )
1715
1716
        assert np.isclose(
1717
            samples_gdf.capacity.sum(),
1718
            pv_cap_range,
1719
            rtol=1e-03,
1720
        ), f"{samples_gdf.capacity.sum()} != {pv_cap_range}"
1721
1722
        results_df = pd.concat(
1723
            [
1724
                results_df,
1725
                samples_gdf,
1726
            ],
1727
        )
1728
1729
    total_missing_factor = pv_cap / results_df.capacity.sum()
1730
1731
    results_df = results_df.assign(
1732
        capacity=(results_df.capacity * total_missing_factor),
1733
    )
1734
1735
    assert np.isclose(
1736
        results_df.capacity.sum(),
1737
        pv_cap,
1738
        rtol=1e-03,
1739
    ), f"{results_df.capacity.sum()} != {pv_cap}"
1740
1741
    return gpd.GeoDataFrame(
1742
        results_df,
1743
        crs=samples_gdf.crs,
0 ignored issues
show
introduced by
The variable samples_gdf does not seem to be defined in case the for loop on line 1627 is not entered. Are you sure this can never be the case?
Loading history...
1744
        geometry="geom",
1745
    )
1746
1747
1748
@timer_func
1749
def desaggregate_pv(
1750
    buildings_gdf: gpd.GeoDataFrame,
1751
    cap_df: pd.DataFrame,
1752
    **kwargs,
1753
) -> gpd.GeoDataFrame:
1754
    """
1755
    Desaggregate PV capacity on buildings within a given grid district.
1756
1757
    Parameters
1758
    -----------
1759
    buildings_gdf : geopandas.GeoDataFrame
1760
        GeoDataFrame containing OSM buildings data.
1761
    cap_df : pandas.DataFrame
1762
        DataFrame with total rooftop capacity per mv grid.
1763
    Other Parameters
1764
    -----------
1765
    prob_dict : dict
1766
        Dictionary with values and probabilities per capacity range.
1767
    cap_share_dict : dict
1768
        Dictionary with share of PV capacity from the total PV capacity within
1769
        capacity ranges.
1770
    building_area_range_dict : dict
1771
        Dictionary with estimated normal building area range per capacity
1772
        range.
1773
    load_factor_dict : dict
1774
        Dictionary with mean roof load factor per capacity range.
1775
    seed : int
1776
        Seed to use for random operations with NumPy and pandas.
1777
    pv_cap_per_sq_m : float, int
1778
        Average expected, installable PV capacity per square meter.
1779
    Returns
1780
    -------
1781
    geopandas.GeoDataFrame
1782
        GeoDataFrame containing OSM building data with desaggregated PV
1783
        plants.
1784
    """
1785
    allocated_buildings_gdf = buildings_gdf.loc[~buildings_gdf.end_of_life]
1786
1787
    building_bus_ids = set(buildings_gdf.bus_id)
1788
    cap_bus_ids = set(cap_df.index)
1789
1790
    logger.debug(
1791
        f"Bus IDs from buildings: {len(building_bus_ids)}\nBus IDs from "
1792
        f"capacity: {len(cap_bus_ids)}"
1793
    )
1794
1795
    if len(building_bus_ids) > len(cap_bus_ids):
1796
        missing = building_bus_ids - cap_bus_ids
1797
    else:
1798
        missing = cap_bus_ids - building_bus_ids
1799
1800
    logger.debug(str(missing))
1801
1802
    bus_ids = np.intersect1d(list(building_bus_ids), list(cap_bus_ids))
1803
1804
    # assert set(buildings_gdf.bus_id.unique()) == set(cap_df.index)
1805
1806
    for bus_id in bus_ids:
1807
        buildings_grid_gdf = buildings_gdf.loc[buildings_gdf.bus_id == bus_id]
1808
1809
        pv_installed_gdf = buildings_grid_gdf.loc[
1810
            ~buildings_grid_gdf.end_of_life
1811
        ]
1812
1813
        pv_installed = pv_installed_gdf.capacity.sum()
1814
1815
        pot_buildings_gdf = buildings_grid_gdf.drop(
1816
            index=pv_installed_gdf.index
1817
        )
1818
1819
        if len(pot_buildings_gdf) == 0:
1820
            logger.error(
1821
                f"In grid {bus_id} there are no potential buildings to "
1822
                f"allocate PV capacity to. The grid is skipped. This message "
1823
                f"should only appear doing test runs with few buildings."
1824
            )
1825
1826
            continue
1827
1828
        pv_target = cap_df.at[bus_id, "capacity"]
1829
1830
        logger.debug(f"pv_target: {pv_target}")
1831
1832
        pv_missing = pv_target - pv_installed
1833
1834
        if pv_missing <= 0:
1835
            logger.warning(
1836
                f"In grid {bus_id} there is more PV installed "
1837
                f"({pv_installed: g} kW) in status Quo than allocated within "
1838
                f"the scenario ({pv_target: g} kW). "
1839
                f"No new generators are added."
1840
            )
1841
1842
            continue
1843
1844
        if pot_buildings_gdf.max_cap.sum() < pv_missing:
1845
            logger.error(
1846
                f"In grid {bus_id} there is less PV potential ("
1847
                f"{pot_buildings_gdf.max_cap.sum():g} MW) than allocated PV "
1848
                f"capacity ({pv_missing:g} MW). The average roof utilization "
1849
                f"will be very high."
1850
            )
1851
1852
        gdf = desaggregate_pv_in_mv_grid(
1853
            buildings_gdf=pot_buildings_gdf,
1854
            pv_cap=pv_missing,
1855
            **kwargs,
1856
        )
1857
1858
        logger.debug(f"New cap in grid {bus_id}: {gdf.capacity.sum()}")
1859
        logger.debug(f"Installed cap in grid {bus_id}: {pv_installed}")
1860
        logger.debug(
1861
            f"Total cap in grid {bus_id}: {gdf.capacity.sum() + pv_installed}"
1862
        )
1863
1864
        if not np.isclose(
1865
            gdf.capacity.sum() + pv_installed, pv_target, rtol=1e-3
1866
        ):
1867
            logger.warning(
1868
                f"The desired capacity and actual capacity in grid {bus_id} "
1869
                f"differ.\n"
1870
                f"Desired cap: {pv_target}\nActual cap: "
1871
                f"{gdf.capacity.sum() + pv_installed}"
1872
            )
1873
1874
        pre_cap = allocated_buildings_gdf.capacity.sum()
1875
        new_cap = gdf.capacity.sum()
1876
1877
        allocated_buildings_gdf = pd.concat(
1878
            [
1879
                allocated_buildings_gdf,
1880
                gdf,
1881
            ]
1882
        )
1883
1884
        total_cap = allocated_buildings_gdf.capacity.sum()
1885
1886
        assert np.isclose(pre_cap + new_cap, total_cap)
1887
1888
    logger.debug("Desaggregated scenario.")
1889
    logger.debug(f"Scenario capacity: {cap_df.capacity.sum(): g}")
1890
    logger.debug(
1891
        f"Generator capacity: " f"{allocated_buildings_gdf.capacity.sum(): g}"
1892
    )
1893
1894
    return gpd.GeoDataFrame(
1895
        allocated_buildings_gdf,
1896
        crs=gdf.crs,
0 ignored issues
show
introduced by
The variable gdf does not seem to be defined for all execution paths.
Loading history...
1897
        geometry="geom",
1898
    )
1899
1900
1901
@timer_func
1902
def add_buildings_meta_data(
1903
    buildings_gdf: gpd.GeoDataFrame,
1904
    prob_dict: dict,
1905
    seed: int,
1906
) -> gpd.GeoDataFrame:
1907
    """
1908
    Randomly add additional metadata to desaggregated PV plants.
1909
    Parameters
1910
    -----------
1911
    buildings_gdf : geopandas.GeoDataFrame
1912
        GeoDataFrame containing OSM buildings data with desaggregated PV
1913
        plants.
1914
    prob_dict : dict
1915
        Dictionary with values and probabilities per capacity range.
1916
    seed : int
1917
        Seed to use for random operations with NumPy and pandas.
1918
    Returns
1919
    -------
1920
    geopandas.GeoDataFrame
1921
        GeoDataFrame containing OSM building data with desaggregated PV
1922
        plants.
1923
    """
1924
    rng = default_rng(seed=seed)
1925
    buildings_gdf = buildings_gdf.reset_index().rename(
1926
        columns={
1927
            "index": "building_id",
1928
        }
1929
    )
1930
1931
    for (min_cap, max_cap), cap_range_prob_dict in prob_dict.items():
1932
        cap_range_gdf = buildings_gdf.loc[
1933
            (buildings_gdf.capacity >= min_cap)
1934
            & (buildings_gdf.capacity < max_cap)
1935
        ]
1936
1937
        for key, values in cap_range_prob_dict["values"].items():
1938
            if key == "load_factor":
1939
                continue
1940
1941
            gdf = cap_range_gdf.loc[
1942
                cap_range_gdf[key].isna()
1943
                | cap_range_gdf[key].isnull()
1944
                | (cap_range_gdf[key] == "None")
1945
            ]
1946
1947
            key_vals = rng.choice(
1948
                a=values,
1949
                size=len(gdf),
1950
                p=cap_range_prob_dict["probabilities"][key],
1951
            )
1952
1953
            buildings_gdf.loc[gdf.index, key] = key_vals
1954
1955
    return buildings_gdf
1956
1957
1958
def add_commissioning_date(
1959
    buildings_gdf: gpd.GeoDataFrame,
1960
    start: pd.Timestamp,
1961
    end: pd.Timestamp,
1962
    seed: int,
1963
):
1964
    """
1965
    Randomly and linear add start-up date to new pv generators.
1966
    Parameters
1967
    ----------
1968
    buildings_gdf : geopandas.GeoDataFrame
1969
        GeoDataFrame containing OSM buildings data with desaggregated PV
1970
        plants.
1971
    start : pandas.Timestamp
1972
        Minimum Timestamp to use.
1973
    end : pandas.Timestamp
1974
        Maximum Timestamp to use.
1975
    seed : int
1976
        Seed to use for random operations with NumPy and pandas.
1977
    Returns
1978
    -------
1979
    geopandas.GeoDataFrame
1980
        GeoDataFrame containing OSM buildings data with start-up date added.
1981
    """
1982
    rng = default_rng(seed=seed)
1983
1984
    date_range = pd.date_range(start=start, end=end, freq="1D")
1985
1986
    return buildings_gdf.assign(
1987
        commissioning_date=rng.choice(date_range, size=len(buildings_gdf))
1988
    )
1989
1990
1991
@timer_func
1992
def allocate_scenarios(
1993
    mastr_gdf: gpd.GeoDataFrame,
1994
    valid_buildings_gdf: gpd.GeoDataFrame,
1995
    last_scenario_gdf: gpd.GeoDataFrame,
1996
    scenario: str,
1997
):
1998
    """
1999
    Desaggregate and allocate scenario pv rooftop ramp-ups onto buildings.
2000
    Parameters
2001
    ----------
2002
    mastr_gdf : geopandas.GeoDataFrame
2003
        GeoDataFrame containing geocoded MaStR data.
2004
    valid_buildings_gdf : geopandas.GeoDataFrame
2005
        GeoDataFrame containing OSM buildings data.
2006
    last_scenario_gdf : geopandas.GeoDataFrame
2007
        GeoDataFrame containing OSM buildings matched with pv generators from
2008
        temporally preceding scenario.
2009
    scenario : str
2010
        Scenario to desaggrgate and allocate.
2011
    Returns
2012
    -------
2013
    tuple
2014
        geopandas.GeoDataFrame
2015
            GeoDataFrame containing OSM buildings matched with pv generators.
2016
        pandas.DataFrame
2017
            DataFrame containing pv rooftop capacity per grid id.
2018
    """
2019
    cap_per_bus_id_df = cap_per_bus_id(scenario)
2020
2021
    if cap_per_bus_id_df.empty:
2022
        print(f"No PV rooftop in scenario {scenario}")
2023
        return
2024
2025
    logger.debug(
2026
        f"cap_per_bus_id_df total capacity: {cap_per_bus_id_df.capacity.sum()}"
2027
    )
2028
2029
    last_scenario_gdf = determine_end_of_life_gens(
2030
        last_scenario_gdf,
2031
        SCENARIO_TIMESTAMP[scenario],
2032
        PV_ROOFTOP_LIFETIME,
2033
    )
2034
2035
    buildings_gdf = calculate_max_pv_cap_per_building(
2036
        valid_buildings_gdf,
2037
        last_scenario_gdf,
2038
        PV_CAP_PER_SQ_M,
2039
        ROOF_FACTOR,
2040
    )
2041
2042
    mastr_gdf = calculate_building_load_factor(
2043
        mastr_gdf,
2044
        buildings_gdf,
2045
    )
2046
2047
    probabilities_dict = probabilities(
2048
        mastr_gdf,
2049
        cap_ranges=CAP_RANGES,
2050
    )
2051
2052
    cap_share_dict = cap_share_per_cap_range(
2053
        mastr_gdf,
2054
        cap_ranges=CAP_RANGES,
2055
    )
2056
2057
    load_factor_dict = mean_load_factor_per_cap_range(
2058
        mastr_gdf,
2059
        cap_ranges=CAP_RANGES,
2060
    )
2061
2062
    building_area_range_dict = building_area_range_per_cap_range(
2063
        mastr_gdf,
2064
        cap_ranges=CAP_RANGES,
2065
        min_building_size=MIN_BUILDING_SIZE,
2066
        upper_quantile=UPPER_QUANTILE,
2067
        lower_quantile=LOWER_QUANTILE,
2068
    )
2069
2070
    allocated_buildings_gdf = desaggregate_pv(
2071
        buildings_gdf=buildings_gdf,
2072
        cap_df=cap_per_bus_id_df,
2073
        prob_dict=probabilities_dict,
2074
        cap_share_dict=cap_share_dict,
2075
        building_area_range_dict=building_area_range_dict,
2076
        load_factor_dict=load_factor_dict,
2077
        seed=SEED,
2078
        pv_cap_per_sq_m=PV_CAP_PER_SQ_M,
2079
    )
2080
2081
    allocated_buildings_gdf = allocated_buildings_gdf.assign(scenario=scenario)
2082
2083
    meta_buildings_gdf = frame_to_numeric(
2084
        add_buildings_meta_data(
2085
            allocated_buildings_gdf,
2086
            probabilities_dict,
2087
            SEED,
2088
        )
2089
    )
2090
2091
    return (
2092
        add_commissioning_date(
2093
            meta_buildings_gdf,
2094
            start=last_scenario_gdf.commissioning_date.max(),
2095
            end=SCENARIO_TIMESTAMP[scenario],
2096
            seed=SEED,
2097
        ),
2098
        cap_per_bus_id_df,
2099
    )
2100
2101
2102
class EgonPowerPlantPvRoofBuilding(Base):
2103
    """
2104
    Class definition of table supply.egon_power_plants_pv_roof_building.
2105
2106
    """
2107
    __tablename__ = "egon_power_plants_pv_roof_building"
2108
    __table_args__ = {"schema": "supply"}
2109
2110
    index = Column(Integer, primary_key=True, index=True)
2111
    scenario = Column(String)
2112
    bus_id = Column(Integer, nullable=True)
2113
    building_id = Column(Integer)
2114
    gens_id = Column(String, nullable=True)
2115
    capacity = Column(Float)
2116
    orientation_uniform = Column(Float)
2117
    orientation_primary = Column(String)
2118
    orientation_primary_angle = Column(String)
2119
    voltage_level = Column(Integer)
2120
    weather_cell_id = Column(Integer)
2121
2122
2123
def add_metadata():
2124
    schema = "supply"
2125
    table = "egon_power_plants_pv_roof_building"
2126
    name = f"{schema}.{table}"
2127
    deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"]
2128
    deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][
2129
        "zenodo"
2130
    ]["deposit_id"]
2131
2132
    contris = contributors(["kh", "kh"])
2133
2134
    contris[0]["date"] = "2023-03-16"
2135
2136
    contris[0]["object"] = "metadata"
2137
    contris[1]["object"] = "dataset"
2138
2139
    contris[0]["comment"] = "Add metadata to dataset."
2140
    contris[1]["comment"] = "Add workflow to generate dataset."
2141
2142
    meta = {
2143
        "name": name,
2144
        "title": "eGon power plants rooftop solar",
2145
        "id": "WILL_BE_SET_AT_PUBLICATION",
2146
        "description": (
2147
            "eGon power plants rooftop solar systems allocated to buildings"
2148
        ),
2149
        "language": "en-US",
2150
        "keywords": ["photovoltaik", "solar", "pv", "mastr", "status quo"],
2151
        "publicationDate": datetime.date.today().isoformat(),
2152
        "context": context(),
2153
        "spatial": {
2154
            "location": "none",
2155
            "extent": "Germany",
2156
            "resolution": "building",
2157
        },
2158
        "temporal": {
2159
            "referenceDate": (
2160
                config.datasets()["mastr_new"]["egon2021_date_max"].split(" ")[
2161
                    0
2162
                ]
2163
            ),
2164
            "timeseries": {},
2165
        },
2166
        "sources": [
2167
            {
2168
                "title": "Data bundle for egon-data",
2169
                "description": (
2170
                    "Data bundle for egon-data: A transparent and "
2171
                    "reproducible data processing pipeline for energy "
2172
                    "system modeling"
2173
                ),
2174
                "path": (
2175
                    "https://zenodo.org/record/"
2176
                    f"{deposit_id_data_bundle}#.Y_dWM4CZMVM"
2177
                ),
2178
                "licenses": [license_dedl(attribution="© Cußmann, Ilka")],
2179
            },
2180
            {
2181
                "title": ("open-MaStR power unit registry for eGo^n project"),
2182
                "description": (
2183
                    "Data from Marktstammdatenregister (MaStR) data using "
2184
                    "the data dump from 2022-11-17 for eGon-data."
2185
                ),
2186
                "path": (
2187
                    f"https://zenodo.org/record/{deposit_id_mastr}"
2188
                ),
2189
                "licenses": [license_dedl(attribution="© Amme, Jonathan")],
2190
            },
2191
            sources()["openstreetmap"],
2192
            sources()["era5"],
2193
            sources()["vg250"],
2194
            sources()["egon-data"],
2195
        ],
2196
        "licenses": [license_odbl("© eGon development team")],
2197
        "contributors": contris,
2198
        "resources": [
2199
            {
2200
                "profile": "tabular-data-resource",
2201
                "name": name,
2202
                "path": "None",
2203
                "format": "PostgreSQL",
2204
                "encoding": "UTF-8",
2205
                "schema": {
2206
                    "fields": generate_resource_fields_from_db_table(
2207
                        schema,
2208
                        table,
2209
                    ),
2210
                    "primaryKey": "index",
2211
                },
2212
                "dialect": {"delimiter": "", "decimalSeparator": ""},
2213
            }
2214
        ],
2215
        "review": {"path": "", "badge": ""},
2216
        "metaMetadata": meta_metadata(),
2217
        "_comment": {
2218
            "metadata": (
2219
                "Metadata documentation and explanation (https://github."
2220
                "com/OpenEnergyPlatform/oemetadata/blob/master/metadata/"
2221
                "v141/metadata_key_description.md)"
2222
            ),
2223
            "dates": (
2224
                "Dates and time must follow the ISO8601 including time "
2225
                "zone (YYYY-MM-DD or YYYY-MM-DDThh:mm:ss±hh)"
2226
            ),
2227
            "units": "Use a space between numbers and units (100 m)",
2228
            "languages": (
2229
                "Languages must follow the IETF (BCP47) format (en-GB, "
2230
                "en-US, de-DE)"
2231
            ),
2232
            "licenses": (
2233
                "License name must follow the SPDX License List "
2234
                "(https://spdx.org/licenses/)"
2235
            ),
2236
            "review": (
2237
                "Following the OEP Data Review (https://github.com/"
2238
                "OpenEnergyPlatform/data-preprocessing/wiki)"
2239
            ),
2240
            "none": "If not applicable use (none)",
2241
        },
2242
    }
2243
2244
    dialect = get_dialect(f"oep-v{meta_metadata()['metadataVersion'][4:7]}")()
2245
2246
    meta = dialect.compile_and_render(dialect.parse(json.dumps(meta)))
2247
2248
    db.submit_comment(
2249
        f"'{json.dumps(meta)}'",
2250
        schema,
2251
        table,
2252
    )
2253
2254
2255
def create_scenario_table(buildings_gdf):
2256
    """Create mapping table pv_unit <-> building for scenario"""
2257
    EgonPowerPlantPvRoofBuilding.__table__.drop(bind=engine, checkfirst=True)
2258
    EgonPowerPlantPvRoofBuilding.__table__.create(bind=engine, checkfirst=True)
2259
2260
    buildings_gdf[COLS_TO_EXPORT].reset_index().to_sql(
2261
        name=EgonPowerPlantPvRoofBuilding.__table__.name,
2262
        schema=EgonPowerPlantPvRoofBuilding.__table__.schema,
2263
        con=db.engine(),
2264
        if_exists="append",
2265
        index=False,
2266
    )
2267
2268
    add_metadata()
2269
2270
2271
def add_weather_cell_id(buildings_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
2272
    sql = """
2273
    SELECT building_id, zensus_population_id
2274
    FROM boundaries.egon_map_zensus_mvgd_buildings
2275
    """
2276
2277
    buildings_gdf = buildings_gdf.merge(
2278
        right=db.select_dataframe(sql).drop_duplicates(subset="building_id"),
2279
        how="left",
2280
        on="building_id",
2281
    )
2282
2283
    sql = """
2284
    SELECT zensus_population_id, w_id as weather_cell_id
2285
    FROM boundaries.egon_map_zensus_weather_cell
2286
    """
2287
2288
    buildings_gdf = buildings_gdf.merge(
2289
        right=db.select_dataframe(sql).drop_duplicates(
2290
            subset="zensus_population_id"
2291
        ),
2292
        how="left",
2293
        on="zensus_population_id",
2294
    )
2295
2296
    if buildings_gdf.weather_cell_id.isna().any():
2297
        missing = buildings_gdf.loc[
2298
            buildings_gdf.weather_cell_id.isna(), "building_id"
2299
        ].tolist()
2300
2301
        raise ValueError(
2302
            f"Following buildings don't have a weather cell id: {missing}"
2303
        )
2304
2305
    return buildings_gdf
2306
2307
2308
def add_bus_ids_sq(
2309
    buildings_gdf: gpd.GeoDataFrame,
2310
) -> gpd.GeoDataFrame:
2311
    """Add bus ids for status_quo units
2312
2313
    Parameters
2314
    -----------
2315
    buildings_gdf : geopandas.GeoDataFrame
2316
        GeoDataFrame containing OSM buildings data with desaggregated PV
2317
        plants.
2318
    Returns
2319
    -------
2320
    geopandas.GeoDataFrame
2321
        GeoDataFrame containing OSM building data with bus_id per
2322
        generator.
2323
    """
2324
    grid_districts_gdf = grid_districts(EPSG)
2325
2326
    mask = buildings_gdf.scenario == "status_quo"
2327
2328
    buildings_gdf.loc[mask, "bus_id"] = (
2329
        buildings_gdf.loc[mask]
2330
        .sjoin(grid_districts_gdf, how="left")
2331
        .index_right
2332
    )
2333
2334
    return buildings_gdf
2335
2336
2337 View Code Duplication
def infer_voltage_level(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2338
    units_gdf: gpd.GeoDataFrame,
2339
) -> gpd.GeoDataFrame:
2340
    """
2341
    Infer nan values in voltage level derived from generator capacity to
2342
    the power plants.
2343
2344
    Parameters
2345
    -----------
2346
    units_gdf : geopandas.GeoDataFrame
2347
        GeoDataFrame containing units with voltage levels from MaStR
2348
    Returnsunits_gdf: gpd.GeoDataFrame
2349
    -------
2350
    geopandas.GeoDataFrame
2351
        GeoDataFrame containing units all having assigned a voltage level.
2352
    """
2353
2354
    def voltage_levels(p: float) -> int:
2355
        if p <= 0.1:
2356
            return 7
2357
        elif p <= 0.2:
2358
            return 6
2359
        elif p <= 5.5:
2360
            return 5
2361
        elif p <= 20:
2362
            return 4
2363
        elif p <= 120:
2364
            return 3
2365
        return 1
2366
2367
    units_gdf["voltage_level_inferred"] = False
2368
    mask = units_gdf.voltage_level.isna()
2369
    units_gdf.loc[mask, "voltage_level_inferred"] = True
2370
    units_gdf.loc[mask, "voltage_level"] = units_gdf.loc[mask].capacity.apply(
2371
        voltage_levels
2372
    )
2373
2374
    return units_gdf
2375
2376
2377
def pv_rooftop_to_buildings():
2378
    """Main script, executed as task"""
2379
2380
    mastr_gdf = load_mastr_data()
2381
2382
    status_quo = "status2023"
2383
2384
    ts = pd.Timestamp(
2385
        config.datasets()["mastr_new"][f"{status_quo}_date_max"], tz="UTC"
2386
    )
2387
2388
    mastr_gdf = mastr_gdf.loc[
2389
        mastr_gdf.commissioning_date <= ts
2390
    ]
2391
2392
    buildings_gdf = load_building_data()
2393
2394
    desagg_mastr_gdf, desagg_buildings_gdf = allocate_to_buildings(
2395
        mastr_gdf, buildings_gdf
2396
    )
2397
2398
    all_buildings_gdf = (
2399
        desagg_mastr_gdf.assign(scenario=status_quo)
2400
        .reset_index()
2401
        .rename(columns={"geometry": "geom"})
2402
        .set_geometry("geom")
2403
    )
2404
2405
    scenario_buildings_gdf = all_buildings_gdf.copy()
2406
2407
    cap_per_bus_id_df = pd.DataFrame()
2408
2409
    for scenario in SCENARIOS:
2410
        if scenario == status_quo:
2411
            continue
2412
        elif "status" in scenario:
2413
            ts = pd.Timestamp(
2414
                config.datasets()["mastr_new"][f"{scenario}_date_max"], tz="UTC"
2415
            )
2416
2417
            scenario_buildings_gdf = scenario_buildings_gdf.loc[
2418
                scenario_buildings_gdf.commissioning_date <= ts
2419
            ]
2420
2421
        else:
2422
            logger.debug(f"Desaggregating scenario {scenario}.")
2423
2424
            (
2425
                scenario_buildings_gdf,
2426
                cap_per_bus_id_scenario_df,
2427
            ) = allocate_scenarios(  # noqa: F841
2428
                desagg_mastr_gdf,
2429
                desagg_buildings_gdf,
2430
                scenario_buildings_gdf,
2431
                scenario,
2432
            )
2433
2434
            cap_per_bus_id_df = pd.concat(
2435
                [cap_per_bus_id_df, cap_per_bus_id_scenario_df]
2436
            )
2437
2438
        all_buildings_gdf = gpd.GeoDataFrame(
2439
            pd.concat(
2440
                [all_buildings_gdf, scenario_buildings_gdf], ignore_index=True
2441
            ),
2442
            crs=scenario_buildings_gdf.crs,
2443
            geometry="geom",
2444
        )
2445
2446
    # add weather cell
2447
    all_buildings_gdf = add_weather_cell_id(all_buildings_gdf)
2448
2449
    # add bus IDs for status quo scenario
2450
    all_buildings_gdf = add_bus_ids_sq(all_buildings_gdf)
2451
2452
    # export scenario
2453
    create_scenario_table(infer_voltage_level(all_buildings_gdf))
2454