Passed
Pull Request — dev (#1355)
by
unknown
02:50
created

area_grouping()   C

Complexity

Conditions 7

Size

Total Lines 141
Code Lines 59

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 59
dl 0
loc 141
rs 6.9418
c 0
b 0
f 0
cc 7
nop 4

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# This script is part of eGon-data.
4
5
# license text - to be added.
6
7
"""
8
Central module containing all code creating with district heating areas.
9
10
This module obtains the information from the census tables and the heat demand
11
densities, demarcates so the current and future district heating areas. In the
12
end it saves them in the database.
13
"""
14
import datetime
15
import json
16
import os
17
18
# for metadata creation
19
import time
20
21
from geoalchemy2.types import Geometry
22
from matplotlib import pyplot as plt
23
from shapely.geometry.multipolygon import MultiPolygon
24
from shapely.geometry.polygon import Polygon
25
26
# packages for ORM class definition
27
from sqlalchemy import Column, Float, ForeignKey, Integer, Sequence, String
28
from sqlalchemy.ext.declarative import declarative_base
29
import geopandas as gpd
30
import pandas as pd
31
32
from egon.data import config, db
33
from egon.data.datasets import Dataset
34
from egon.data.datasets.district_heating_areas.plot import (
35
    plot_heat_density_sorted,
36
)
37
from egon.data.datasets.scenario_parameters import (
38
    EgonScenario,
39
    get_sector_parameters,
40
)
41
from egon.data.metadata import context, license_ccby, meta_metadata, sources
42
43
# import time
44
45
46
# class for airflow task management (and version control)
47
class DistrictHeatingAreas(Dataset):
48
    """
49
    Create district heating grids for all scenarios
50
51
    This dataset creates district heating grids for each scenario based on a defined
52
    district heating share, annual heat demands calcultaed within
53
    :py:class:`HeatDemandImport <egon.data.datasets.heat_demand.HeatDemandImport>`
54
    and information on existing heating grids from census :py:class:`ZensusMiscellaneous <egon.data.datasets.zensus.ZensusMiscellaneous>`
55
56
    First the tables are created using :py:func:`create_tables`. Afterwards, the
57
    distict heating grids for each scenario are created and inserted into the database
58
    by applying the function :py:func:`district_heating_areas`
59
60
61
    *Dependencies*
62
      * :py:class:`HeatDemandImport <egon.data.datasets.heat_demand.HeatDemandImport>`
63
      * :py:class:`ZensusMiscellaneous <egon.data.datasets.zensus.ZensusMiscellaneous>`
64
      * :py:class:`ScenarioParameters <egon.data.datasets.scenario_parameters.ScenarioParameters>`
65
66
    *Resulting tables*
67
      * :py:class:`demand.egon_map_zensus_district_heating_areas <egon.data.datasets.district_heating_areas.MapZensusDistrictHeatingAreas>`
68
        is created and filled
69
      * :py:class:`demand.egon_district_heating_areas <egon.data.datasets.district_heating_areas.EgonDistrictHeatingAreas>` is created and filled
70
71
    """
72
73
    #:
74
    name: str = "district-heating-areas"
75
    #:
76
    version: str = "0.0.3"
77
78
    def __init__(self, dependencies):
79
        super().__init__(
80
            name=self.name,
81
            # version=self.target_files + "_0.0",
82
            version=self.version,  # maybe rethink the naming
83
            dependencies=dependencies,
84
            tasks=(create_tables, demarcation),
85
        )
86
87
88
Base = declarative_base()
89
90
91
# definition of classes for saving data in the database
92
class MapZensusDistrictHeatingAreas(Base):
93
    __tablename__ = "egon_map_zensus_district_heating_areas"
94
    __table_args__ = {"schema": "demand"}
95
    id = Column(
96
        Integer,
97
        Sequence("map_zensus_district_heating_areas_seq", schema="demand"),
98
        server_default=Sequence(
99
            "map_zensus_district_heating_areas_seq", schema="demand"
100
        ).next_value(),
101
        primary_key=True,
102
    )
103
    area_id = Column(Integer)
104
    scenario = Column(String, ForeignKey(EgonScenario.name))
105
    zensus_population_id = Column(Integer)
106
107
108
class EgonDistrictHeatingAreas(Base):
109
    __tablename__ = "egon_district_heating_areas"
110
    __table_args__ = {"schema": "demand"}
111
    id = Column(
112
        Integer,
113
        Sequence("district_heating_areas_seq", schema="demand"),
114
        server_default=Sequence(
115
            "district_heating_areas_seq", schema="demand"
116
        ).next_value(),
117
        primary_key=True,
118
    )
119
    area_id = Column(Integer)
120
    scenario = Column(String, ForeignKey(EgonScenario.name))
121
    geom_polygon = Column(Geometry("MULTIPOLYGON", 3035))
122
    residential_and_service_demand = Column(Float)
123
124
125
def create_tables():
126
    """Create tables for district heating areas
127
128
    Returns
129
    -------
130
        None
131
    """
132
133
    # Create schema
134
    db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;")
135
136
    # Drop tables
137
    db.execute_sql(
138
        """DROP TABLE IF EXISTS
139
            demand.egon_district_heating_areas CASCADE;"""
140
    )
141
142
    db.execute_sql(
143
        """DROP TABLE IF EXISTS
144
            demand.egon_map_zensus_district_heating_areas CASCADE;"""
145
    )
146
147
    db.execute_sql(
148
        """DROP TABLE IF EXISTS
149
            demand.district_heating_areas CASCADE;"""
150
    )
151
152
    db.execute_sql(
153
        """DROP TABLE IF EXISTS
154
            demand.map_zensus_district_heating_areas CASCADE;"""
155
    )
156
157
    # Drop sequences
158
    db.execute_sql(
159
        """DROP SEQUENCE IF EXISTS
160
            demand.district_heating_areas_seq CASCADE;"""
161
    )
162
163
    db.execute_sql(
164
        """DROP SEQUENCE IF EXISTS
165
            demand.egon_map_zensus_district_heating_areas_seq CASCADE;"""
166
    )
167
168
    engine = db.engine()
169
    EgonDistrictHeatingAreas.__table__.create(bind=engine, checkfirst=True)
170
    MapZensusDistrictHeatingAreas.__table__.create(
171
        bind=engine, checkfirst=True
172
    )
173
174
175
# Methods used are explained here:
176
# https://geopandas.org/docs/user_guide/geometric_manipulations.html
177
178
179
def load_census_data(minimum_connection_rate=0.3):
180
    """
181
    Load the heating type information from the census database table.
182
183
    The census apartment and the census building table contains information
184
    about the heating type. The information are loaded from the apartment
185
    table, because they might be more useful when it comes to the estimation of
186
    the connection rates. Only cells with a connection rate equal to or larger
187
    than 30% (based on the census apartment data) are included in the returned
188
    district_heat GeoDataFrame.
189
190
    Parameters
191
    ----------
192
        None
193
194
    Returns
195
    -------
196
    district_heat: geopandas.geodataframe.GeoDataFrame
197
        polygons (hectare cells) with district heat information
198
199
    heating_type: geopandas.geodataframe.GeoDataFrame
200
        polygons (hectare cells) with the number of flats having heating
201
        type information
202
203
    Notes
204
    -----
205
        The census contains only information on residential buildings.
206
        Therefore, also connection rate of the residential buildings can be
207
        estimated.
208
209
    TODO
210
    ----
211
        - ONLY load cells with flats.quantity_q <2
212
        - remove heating_type return, if not needed
213
        - store less columns in the district_heat (pop.geom_point),
214
            drop characteristics_text after use
215
216
    """
217
218
    # only census cells where egon-data has a heat demand are considered
219
220
    district_heat = db.select_geodataframe(
221
        """SELECT flats.zensus_population_id, flats.characteristics_text,
222
        flats.quantity, flats.quantity_q, pop.geom_point,
223
        pop.geom AS geom_polygon
224
        FROM society.egon_destatis_zensus_apartment_per_ha AS flats
225
        JOIN society.destatis_zensus_population_per_ha AS pop
226
        ON flats.zensus_population_id = pop.id
227
        AND flats.characteristics_text = 'Fernheizung (Fernwärme)'
228
        AND flats.zensus_population_id IN
229
        (SELECT zensus_population_id FROM demand.egon_peta_heat);""",
230
        index_col="zensus_population_id",
231
        geom_col="geom_polygon",
232
    )
233
234
    heating_type = db.select_geodataframe(
235
        """SELECT flats.zensus_population_id,
236
        SUM(flats.quantity) AS quantity, pop.geom AS geom_polygon
237
        FROM society.egon_destatis_zensus_apartment_per_ha AS flats
238
        JOIN society.destatis_zensus_population_per_ha AS pop
239
        ON flats.zensus_population_id = pop.id
240
        AND flats.attribute = 'HEIZTYP'
241
        AND flats.zensus_population_id IN
242
        (SELECT zensus_population_id FROM demand.egon_peta_heat)
243
        GROUP BY flats.zensus_population_id, pop.geom;""",
244
        index_col="zensus_population_id",
245
        geom_col="geom_polygon",
246
    )
247
248
    # district_heat.to_file(results_path+"dh.shp")
249
    # heating_type.to_file(results_path+"heating.shp")
250
251
    # calculate the connection rate for all census cells with DH
252
    # adding it to the district_heat geodataframe
253
    district_heat["connection_rate"] = district_heat["quantity"].div(
254
        heating_type["quantity"]
255
    )[district_heat.index]
256
    # district_heat.head
257
    # district_heat['connection_rate'].describe()
258
259
    district_heat = district_heat[
260
        district_heat["connection_rate"] >= minimum_connection_rate
261
    ]
262
    # district_heat.columns
263
264
    return district_heat, heating_type
265
266
267
def load_heat_demands(scenario_name):
268
    """
269
    Load scenario specific heat demand data from the local database.
270
271
    Parameters
272
    ----------
273
    scenario_name: str
274
        name of the scenario studied
275
276
    Returns
277
    -------
278
    heat_demand: geopandas.geodataframe.GeoDataFrame
279
        polygons (hectare cells) with heat demand data
280
281
    """
282
283
    # load the total heat demand (residential plus service sector)
284
    heat_demand = db.select_geodataframe(
285
        f"""SELECT demand.zensus_population_id,
286
        SUM(demand.demand) AS residential_and_service_demand,
287
        pop.geom AS geom_polygon
288
        FROM demand.egon_peta_heat AS demand
289
        JOIN society.destatis_zensus_population_per_ha AS pop
290
        ON demand.zensus_population_id = pop.id
291
        AND demand.scenario = '{scenario_name}'
292
        GROUP BY demand.zensus_population_id, pop.geom;""",
293
        index_col="zensus_population_id",
294
        geom_col="geom_polygon",
295
    )
296
297
    return heat_demand
298
299
300
def select_high_heat_demands(heat_demand):
301
    """
302
    Take heat demand cells and select cells with higher heat demand.
303
304
    Those can be used to identify prospective district heating supply areas.
305
306
    Parameters
307
    ----------
308
    heat_demand: geopandas.geodataframe.GeoDataFrame
309
        dataset of heat demand cells.
310
311
    Returns
312
    -------
313
    high_heat_demand: geopandas.geodataframe.GeoDataFrame
314
             polygons (hectare cells) with heat demands high enough to be
315
             potentially high enough to be in a district heating area
316
    """
317
318
    # starting point are 100 or 200 GJ/ (ha a), converted into MWh
319
    minimum_demand = 100 / 3.6
320
321
    high_heat_demand = heat_demand[
322
        heat_demand["residential_and_service_demand"] > minimum_demand
323
    ]
324
    # high_heat_demand.head()
325
    # print(high_heat_demand.area) # all cells are 10,000 m²
326
327
    return high_heat_demand
328
329
330
def area_grouping(
331
    raw_polygons,
332
    distance=200,
333
    minimum_total_demand=None,
334
    maximum_total_demand=None,
335
):
336
    """
337
    Group polygons which are close to each other.
338
339
    This function creates buffers around the given cell polygons (called
340
    "raw_polygons") and unions the intersecting buffer polygons. Afterwards, it
341
    unions the cell polygons which are within one unified buffer polygon.
342
    If requested, the cells being in areas fulfilling the minimum heat demand
343
    criterium are selected.
344
345
    Parameters
346
    ----------
347
    raw_polygons: geopandas.geodataframe.GeoDataFrame
348
        polygons to be grouped.
349
350
    distance: integer
351
        distance for buffering
352
353
    minimum_total_demand: integer
354
        optional minimum total heat demand to achieve a minimum size of areas
355
356
    maximal_total_demand: integer
357
        optional maximal total heat demand per area, if demand is higher the
358
        area is cut at nuts3 borders
359
360
361
    Returns
362
    -------
363
    join: geopandas.geodataframe.GeoDataFrame
364
        cell polygons with area id
365
366
    """
367
368
    buffer_distance = distance + 1
369
    cell_buffers = raw_polygons.copy()
370
    cell_buffers["geom_polygon"] = cell_buffers["geom_polygon"].buffer(
371
        buffer_distance
372
    )
373
    # print(cell_buffers.area)
374
375
    # create a shapely Multipolygon which is split into a list
376
    buffer_polygons = list(cell_buffers["geom_polygon"].unary_union.geoms)
377
378
    # change the data type into geopandas geodataframe
379
    buffer_polygons_gdf = gpd.GeoDataFrame(geometry=buffer_polygons, crs=3035)
380
    # buffer_polygons_gdf.plot()
381
382
    # Join studied cells with buffer polygons
383
    columnname = "area_id"
384
    join = gpd.sjoin(
385
        raw_polygons, buffer_polygons_gdf, how="inner", predicate="intersects"
386
    )
387
388
    join = join.rename({"index_right": columnname}, axis=1)
389
    # join.plot(column=columnname)
390
391
    # minimum total heat demand for the areas with minimum criterium
392
    if (
393
        minimum_total_demand is not None
394
        and "residential_and_service_demand" in raw_polygons.columns
395
    ):
396
        # total_heat_demand = join.dissolve('area_id', aggfunc='sum')
397
        # type(large_areas)
398
        # filtered = join.groupby(['area_id'])[
399
        #     'residential_and_service_demand'].agg('sum') > 0.7
400
        large_areas = gpd.GeoDataFrame(
401
            join.groupby(["area_id"])["residential_and_service_demand"].agg(
402
                "sum"
403
            )
404
        )
405
        # large_areas = large_areas[large_areas[
406
        #     'residential_and_service_demand'] > minimum_total_demand]
407
        large_areas = (
408
            large_areas["residential_and_service_demand"]
409
            > minimum_total_demand
410
        )
411
        join = join[join.area_id.isin(large_areas[large_areas].index)]
412
413
    elif (
414
        minimum_total_demand is not None
415
        and "residential_and_service_demand" not in raw_polygons.columns
416
    ):
417
        print(
418
            """The minimum total heat demand criterium can only be applied
419
              on geodataframe having a column named
420
              'residential_and_service_demand' """
421
        )
422
423
    if (
424
        maximum_total_demand
425
        and "residential_and_service_demand" in join.columns
426
    ):
427
428
        huge_areas_index = (
429
            join.groupby("area_id").residential_and_service_demand.sum()
430
            > maximum_total_demand
431
        )
432
433
        cells_in_huge_areas = join[
434
            join.area_id.isin(huge_areas_index[huge_areas_index].index)
435
        ]
436
437
        nuts3_boundaries = db.select_geodataframe(
438
            """
439
            SELECT gen, geometry as geom FROM boundaries.vg250_krs
440
            """
441
        )
442
        join_2 = gpd.sjoin(
443
            cells_in_huge_areas,
444
            nuts3_boundaries,
445
            how="inner",
446
            predicate="intersects",
447
        )
448
449
        join = join.drop(cells_in_huge_areas.index)
450
451
        max_area_id = join.area_id.max()
452
453
        join_2["area_id"] = join_2.index_right + max_area_id + 1
454
455
        join = pd.concat(
456
            [
457
                join,
458
                join_2[
459
                    [
460
                        "zensus_population_id",
461
                        "residential_and_service_demand",
462
                        "geom_polygon",
463
                        "area_id",
464
                    ]
465
                ],
466
            ],
467
            ignore_index=True,
468
        )
469
470
    return join
471
472
473
def district_heating_areas(scenario_name, plotting=False):
474
    """
475
    Create scenario specific district heating areas considering on census data.
476
477
    This function loads the district heating share from the scenario table and
478
    demarcate the scenario specific district heating areas. To do so it
479
    uses the census data on flats currently supplied with district heat, which
480
    are supplied selected first, if the estimated connection rate >= 30%.
481
482
    All scenarios use the Prospective Supply Districts (PSDs) made for the
483
    eGon2035 scenario to identify the areas where additional district heating
484
    supply is feasible. One PSD dataset is to defined which is constant over
485
    the years to allow comparisons. Moreover, it is
486
    assumed that the eGon2035 PSD dataset is suitable, even though the heat
487
    demands will continue to decrease from 2035 to 2050, because district
488
    heating systems will be to planned and built before 2050, to exist in 2050.
489
490
    It is assumed that the connection rate in cells with district heating will
491
    be a 100%. That is because later in project the number of buildings per
492
    cell will be used and connection rates not being 0 or 100% will create
493
    buildings which are not fully supplied by one technology.
494
495
    The cell polygons which carry information (like heat demand etc.) are
496
    grouped into areas which are close to each other.
497
    Only cells with a minimum heat demand density (e.g. >100 GJ/(ha a)) are
498
    considered when creating PSDs. Therefore, the select_high_heat_demands()
499
    function is used. There is minimum heat demand per PSDs to achieve a
500
    certain size.
501
    While the grouping buffer for the creation of Prospective Supply Districts
502
    (PSDs) is 200m as in the sEEnergies project, the buffer for grouping census
503
    data cell with an estimated connection rate >= 30% is 500m.
504
    The 500m buffer is also used when the resulting district heating areas are
505
    grouped, because they are built upon the existing district heating systems.
506
507
    To reduce the final number of district heating areas having the size of
508
    only one hectare, the minimum heat demand critrium is also applied when
509
    grouping the cells with census data on district heat.
510
511
    To avoid huge district heating areas, as they appear in the Ruhr area,
512
    district heating areas with an annual demand > 4,000,000 MWh are split
513
    by nuts3 boundaries. This as set as maximum_total_demand of the
514
    area_grouping function.
515
516
517
    Parameters
518
    ----------
519
    scenario_name: str
520
        name of scenario to be studies
521
522
    plotting: boolean
523
        if True, figure showing the heat demand density curve will be created
524
525
526
    Returns
527
    -------
528
        None
529
530
    Notes
531
    -----
532
        None
533
534
    TODO
535
    ----
536
        Do "area_grouping(load_census_data()[0])" only once, not for all
537
        scenarios.
538
539
        Check the applied buffer distances, find a justification for the
540
        documentation
541
542
    """
543
544
    # Load district heating shares from the scenario table
545
    if scenario_name == "eGon2015":
546
        district_heating_share = 0.08
547
    else:
548
        heat_parameters = get_sector_parameters("heat", scenario=scenario_name)
549
550
        district_heating_share = heat_parameters["DE_district_heating_share"]
551
552
    minimum_connection_rate = 0.3
553
554
    # Adjust minimum connection rate for status2019, and other statusquo scn
555
    # otherwise the existing district heating grids would have too much demand
556
    # if scenario_name == "status2019":
557
    if "status" in scenario_name:
558
        minimum_connection_rate = 0.6
559
560
    # heat_demand is scenario specific
561
    heat_demand_cells = load_heat_demands(scenario_name)
562
563
    # Firstly, supply the cells which already have district heating according
564
    # to 2011 Census data and which are within likely dh areas (created
565
    # by the area grouping function), load only the first returned result: [0]
566
    min_hd_census = 10000 / 3.6  # in MWh
567
568
    census_plus_heat_demand = load_census_data(
569
        minimum_connection_rate=minimum_connection_rate
570
    )[0].copy()
571
    census_plus_heat_demand["residential_and_service_demand"] = (
572
        heat_demand_cells.loc[
573
            census_plus_heat_demand.index.values,
574
            "residential_and_service_demand",
575
        ]
576
    )
577
578
    cells = area_grouping(
579
        census_plus_heat_demand,
580
        distance=500,
581
        minimum_total_demand=min_hd_census,
582
    )
583
    # cells.groupby("area_id").size().sort_values()
584
585
    total_district_heat = (
586
        heat_demand_cells["residential_and_service_demand"].sum()
587
        * district_heating_share
588
    )
589
590
    diff = total_district_heat - cells["residential_and_service_demand"].sum()
591
592
    assert (
593
        diff > 0
594
    ), """The chosen district heating share in combination with the heat
595
        demand reduction leads to an amount of district heat which is
596
        lower than the current one. This case is not implemented yet."""
597
598
    # Secondly, supply the cells with the highest heat demand not having
599
    # district heating yet
600
    # ASSUMPTION HERE: 2035 HD defined the PSDs
601
    min_hd = 10000 / 3.6
602
    PSDs = area_grouping(
603
        select_high_heat_demands(load_heat_demands(scenario_name)),
604
        distance=200,
605
        minimum_total_demand=min_hd,
606
    )
607
608
    # PSDs.groupby("area_id").size().sort_values()
609
610
    # select all cells not already suppied with district heat
611
    new_areas = heat_demand_cells[~heat_demand_cells.index.isin(cells.index)]
612
    # sort by heat demand density
613
    new_areas = new_areas[new_areas.index.isin(PSDs.index)].sort_values(
614
        "residential_and_service_demand", ascending=False
615
    )
616
    new_areas["Cumulative_Sum"] = (
617
        new_areas.residential_and_service_demand.cumsum()
618
    )
619
    # select cells to be supplied with district heating until district
620
    # heating share is reached
621
    new_areas = new_areas[new_areas["Cumulative_Sum"] <= diff]
622
623
    print(
624
        f"""Minimum heat demand density for cells with new district heat
625
          supply in scenario {scenario_name} is
626
          {new_areas.residential_and_service_demand.tail(1).values[0]}
627
          MWh / (ha a)."""
628
    )
629
    print(
630
        f"""Number of cells with new district heat supply in scenario
631
          {scenario_name} is {len(new_areas)}."""
632
    )
633
634
    # check = gpd.GeoDataFrame(
635
    #     cells[['residential_and_service_demand', 'geom_polygon']].append(
636
    #         new_areas[['residential_and_service_demand', 'geom_polygon']]),
637
    #     geometry='geom_polygon')
638
639
    # group the resulting scenario specific district heating areas
640
    scenario_dh_area = area_grouping(
641
        pd.concat(
642
            [
643
                cells[["residential_and_service_demand", "geom_polygon"]],
644
                new_areas[["residential_and_service_demand", "geom_polygon"]],
645
            ]
646
        ).reset_index(),
647
        distance=500,
648
        maximum_total_demand=4e6,
649
    )
650
    scenario_dh_area.loc[:, "zensus_population_id"] = scenario_dh_area.loc[
651
        :, "zensus_population_id"
652
    ].astype(int)
653
    # scenario_dh_area.plot(column = "area_id")
654
655
    scenario_dh_area.groupby("area_id").size().sort_values()
656
    scenario_dh_area.residential_and_service_demand.sum()
657
    # scenario_dh_area.sort_index()
658
    # cells[cells.index==1416974]
659
660
    # store the results in the database
661
    scenario_dh_area["scenario"] = scenario_name
662
663
    db.execute_sql(
664
        f"""DELETE FROM demand.egon_map_zensus_district_heating_areas
665
                   WHERE scenario = '{scenario_name}'"""
666
    )
667
    scenario_dh_area[["scenario", "area_id", "zensus_population_id"]].to_sql(
668
        "egon_map_zensus_district_heating_areas",
669
        schema="demand",
670
        con=db.engine(),
671
        if_exists="append",
672
        index=False,
673
    )
674
675
    # Create polygons around the grouped cells and store them in the database
676
    # join.dissolve(columnname).convex_hull.plot() # without holes, too big
677
    areas_dissolved = scenario_dh_area.dissolve("area_id", aggfunc="sum")
678
    areas_dissolved["scenario"] = scenario_name
679
680
    areas_dissolved["geom_polygon"] = [
681
        MultiPolygon([feature]) if type(feature) == Polygon else feature
682
        for feature in areas_dissolved["geom_polygon"]
683
    ]
684
    # type(areas_dissolved["geom"][0])
685
    # print(type(areas_dissolved))
686
    # print(areas_dissolved.head())
687
688
    if len(areas_dissolved[areas_dissolved.area == 100 * 100]) > 0:
689
        print(
690
            f"""Number of district heating areas of single zensus cells:
691
              {len(areas_dissolved[areas_dissolved.area == 100*100])
692
               }"""
693
        )
694
        # print(f"""District heating areas ids of single zensus cells in
695
        #       district heating areas:
696
        #       {areas_dissolved[areas_dissolved.area == 100*100].index.values
697
        #        }""")
698
        # print(f"""Zensus_population_ids of single zensus cells
699
        #       in district heating areas:
700
        #       {scenario_dh_area[scenario_dh_area.area_id.isin(
701
        #           areas_dissolved[areas_dissolved.area == 100*100].index.values
702
        #           )].index.values}""")
703
704
    db.execute_sql(
705
        f"""DELETE FROM demand.egon_district_heating_areas
706
                   WHERE scenario = '{scenario_name}'"""
707
    )
708
    areas_dissolved.reset_index().drop(
709
        "zensus_population_id", axis="columns"
710
    ).to_postgis(
711
        "egon_district_heating_areas",
712
        schema="demand",
713
        con=db.engine(),
714
        if_exists="append",
715
    )
716
    # Alternative:
717
    # join.groupby("columnname").demand.sum()
718
    # add the sorted heat demand density curve
719
    no_district_heating = heat_demand_cells[
720
        ~heat_demand_cells.index.isin(scenario_dh_area.index)
721
    ]
722
    collection = pd.concat(
723
        [
724
            cells.sort_values(
725
                "residential_and_service_demand", ascending=False
726
            ),
727
            new_areas.sort_values(
728
                "residential_and_service_demand", ascending=False
729
            ),
730
            no_district_heating.sort_values(
731
                "residential_and_service_demand", ascending=False
732
            ),
733
        ],
734
        ignore_index=True,
735
    )
736
    collection["Cumulative_Sum"] = (
737
        collection.residential_and_service_demand.cumsum()
738
    ) / 1000000
739
    if plotting:
740
        plot_heat_density_sorted({scenario_name: collection}, scenario_name)
741
742
    return collection
743
744
745
def add_metadata():
746
    """
747
    Writes metadata JSON string into table comment.
748
749
    TODO
750
    ----
751
752
        Meta data must be check and adjusted to the egon_data standard:
753
            - Add context
754
            - authors and institutions
755
756
    """
757
758
    # Prepare variables
759
    license_district_heating_areas = [
760
        license_ccby("© Europa-Universität Flensburg")
761
    ]
762
763
    # Metadata creation for district heating areas (polygons)
764
    meta = {
765
        "name": "district_heating_areas_metadata",
766
        "title": "eGo^n scenario-specific future district heating areas",
767
        "description": "Modelled future district heating areas for "
768
        "the supply of residential and service-sector heat demands",
769
        "language": ["EN"],
770
        "publicationDate": datetime.date.today().isoformat(),
771
        "context": context(),
772
        "spatial": {"location": "", "extent": "Germany", "resolution": ""},
773
        "sources": [
774
            sources()["peta"],
775
            sources()["egon-data"],
776
            sources()["zensus"],
777
            sources()["vg250"],
778
        ],
779
        "resources": [
780
            {
781
                "profile": "tabular-data-resource",
782
                "name": "egon_district_heating_areas",
783
                "path": "",
784
                "format": "PostgreSQL",
785
                "encoding": "UTF-8",
786
                "schema": {
787
                    "fields": [
788
                        {
789
                            "name": "id",
790
                            "description": "Unique identifier",
791
                            "type": "serial",
792
                            "unit": "none",
793
                        },
794
                        {
795
                            "name": "area_id",
796
                            "description": "District heating area id",
797
                            "type": "integer",
798
                            "unit": "none",
799
                        },
800
                        {
801
                            "name": "scenario",
802
                            "description": "scenario name",
803
                            "type": "text",
804
                            "unit": "none",
805
                        },
806
                        {
807
                            "name": "residential_and_service_demand",
808
                            "description": "annual heat demand",
809
                            "type": "double precision",
810
                            "unit": "MWh",
811
                        },
812
                        {
813
                            "name": "geom_polygon",
814
                            "description": "geo information of multipolygons",
815
                            "type": "geometry(MULTIPOLYGON, 3035)",
816
                            "unit": "none",
817
                        },
818
                    ],
819
                    "primaryKey": ["id"],
820
                    "foreignKeys": [
821
                        {
822
                            "fields": ["scenario"],
823
                            "reference": {
824
                                "resource": "scenario.egon_scenario_parameters",
825
                                "fields": ["name"],
826
                            },
827
                        }
828
                    ],
829
                },
830
                "dialect": {"delimiter": "none", "decimalSeparator": "."},
831
            }
832
        ],
833
        "licenses": license_district_heating_areas,
834
        "contributors": [
835
            {
836
                "title": "EvaWie",
837
                "email": "http://github.com/EvaWie",
838
                "date": time.strftime("%Y-%m-%d"),
839
                "object": None,
840
                "comment": "Imported data",
841
            },
842
            {
843
                "title": "Clara Büttner",
844
                "email": "http://github.com/ClaraBuettner",
845
                "date": time.strftime("%Y-%m-%d"),
846
                "object": None,
847
                "comment": "Updated metadata",
848
            },
849
        ],
850
        "metaMetadata": meta_metadata(),
851
    }
852
    meta_json = "'" + json.dumps(meta) + "'"
853
854
    db.submit_comment(meta_json, "demand", "egon_district_heating_areas")
855
856
    # Metadata creation for "id mapping" table
857
    meta = {
858
        "name": "map_zensus_district_heating_areas_metadata",
859
        "title": "district heating area ids assigned to zensus_population_ids",
860
        "description": "Ids of scenario specific future district heating areas"
861
        " for supply of residential and service-sector heat demands"
862
        " assigned to zensus_population_ids",
863
        "language": ["EN"],
864
        "publicationDate": datetime.date.today().isoformat(),
865
        "context": context(),
866
        "spatial": {"location": "", "extent": "Germany", "resolution": ""},
867
        "sources": [
868
            sources()["peta"],
869
            sources()["egon-data"],
870
            sources()["zensus"],
871
            sources()["vg250"],
872
        ],
873
        # Add the license for the map table
874
        "resources": [
875
            {
876
                "profile": "tabular-data-resource",
877
                "name": "egon_map_zensus_district_heating_areas",
878
                "path": "",
879
                "format": "PostgreSQL",
880
                "encoding": "UTF-8",
881
                "schema": {
882
                    "fields": [
883
                        {
884
                            "name": "id",
885
                            "description": "Unique identifier",
886
                            "type": "serial",
887
                            "unit": "none",
888
                        },
889
                        {
890
                            "name": "area_id",
891
                            "description": "district heating area id",
892
                            "type": "integer",
893
                            "unit": "none",
894
                        },
895
                        {
896
                            "name": "scenario",
897
                            "description": "scenario name",
898
                            "type": "text",
899
                            "unit": "none",
900
                        },
901
                    ],
902
                    "primaryKey": ["id"],
903
                    "foreignKeys": [
904
                        {
905
                            "fields": ["zensus_population_id"],
906
                            "reference": {
907
                                "resource": "society.destatis_zensus_population_per_ha",
908
                                "fields": ["id"],
909
                            },
910
                        },
911
                        {
912
                            "fields": ["scenario"],
913
                            "reference": {
914
                                "resource": "scenario.egon_scenario_parameters",
915
                                "fields": ["name"],
916
                            },
917
                        },
918
                    ],
919
                },
920
                "dialect": {"delimiter": "none", "decimalSeparator": "."},
921
            }
922
        ],
923
        "licenses": license_district_heating_areas,
924
        "contributors": [
925
            {
926
                "title": "EvaWie",
927
                "email": "http://github.com/EvaWie",
928
                "date": time.strftime("%Y-%m-%d"),
929
                "object": None,
930
                "comment": "Imported data",
931
            },
932
            {
933
                "title": "Clara Büttner",
934
                "email": "http://github.com/ClaraBuettner",
935
                "date": time.strftime("%Y-%m-%d"),
936
                "object": None,
937
                "comment": "Updated metadata",
938
            },
939
        ],
940
        "metaMetadata": meta_metadata(),
941
    }
942
    meta_json = "'" + json.dumps(meta) + "'"
943
944
    db.submit_comment(
945
        meta_json, "demand", "egon_map_zensus_district_heating_areas"
946
    )
947
948
    return None
949
950
951
def study_prospective_district_heating_areas():
952
    """
953
    Get information about Prospective Supply Districts for district heating.
954
955
    This optional function executes the functions so that you can study the
956
    heat demand density data of different scenarios and compare them and the
957
    resulting Prospective Supply Districts (PSDs) for district heating. This
958
    functions saves local shapefiles, because these data are not written into
959
    database. Moreover, heat density curves are drawn.
960
    This function is tailor-made and includes the scenarios eGon2035 and
961
    eGon100RE.
962
963
    Parameters
964
    ----------
965
        None
966
967
    Returns
968
    -------
969
        None
970
971
    Notes
972
    -----
973
        None
974
975
    TODO
976
    ----
977
        PSD statistics (average PSD connection rate, total HD per PSD) could
978
        be studied
979
    """
980
981
    # create directory to store files
982
    results_path = "district_heating_areas/"
983
984
    if not os.path.exists(results_path):
985
        os.mkdir(results_path)
986
987
    # load the total heat demand by census cell (residential plus service)
988
    # HD_2015 = load_heat_demands('eGon2015')
989
    # status quo heat demand data are part of the regluar database content
990
    # to get them, line 463 ("if not '2015' in source.stem:") has to be
991
    # deleted from
992
    # importing/heat_demand_data/__init__.py
993
    # and an empty row has to be added to scenario table:
994
    # INSERT INTO scenario.egon_scenario_parameters (name)
995
    # VALUES ('eGon2015');
996
    # because egon2015 is not part of the regular EgonScenario table!
997
    HD_2035 = load_heat_demands("eGon2035")
998
    HD_2050 = load_heat_demands("eGon100RE")
999
1000
    # select only cells with heat demands > 100 GJ / (ha a)
1001
    # HD_2015_above_100GJ = select_high_heat_demands(HD_2015)
1002
    HD_2035_above_100GJ = select_high_heat_demands(HD_2035)
1003
    HD_2050_above_100GJ = select_high_heat_demands(HD_2050)
1004
1005
    # PSDs
1006
    # grouping cells applying the 201m distance buffer, including heat demand
1007
    # aggregation
1008
    # after decision for one year/scenario (here 2035), in the pipeline PSDs
1009
    # are only calculeated for the one selected year/scenario;
1010
    # here you can see all years/scenarios:
1011
    # PSD_2015_201m = area_grouping(HD_2015_above_100GJ, distance=200,
1012
    #                               minimum_total_demand=(10000/3.6)
1013
    #                                ).dissolve('area_id', aggfunc='sum')
1014
    # PSD_2015_201m.to_file(results_path+"PSDs_2015based.shp")
1015
    PSD_2035_201m = area_grouping(
1016
        HD_2035_above_100GJ, distance=200, minimum_total_demand=(10000 / 3.6)
1017
    ).dissolve("area_id", aggfunc="sum")
1018
    # HD_2035.to_file(results_path+"HD_2035.shp")
1019
    # HD_2035_above_100GJ.to_file(results_path+"HD_2035_above_100GJ.shp")
1020
    PSD_2035_201m.to_file(results_path + "PSDs_2035based.shp")
1021
    PSD_2050_201m = area_grouping(
1022
        HD_2050_above_100GJ, distance=200, minimum_total_demand=(10000 / 3.6)
1023
    ).dissolve("area_id", aggfunc="sum")
1024
    PSD_2050_201m.to_file(results_path + "PSDs_2050based.shp")
1025
1026
    # plotting all cells - not considering census data
1027
    # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html
1028
    # https://www.earthdatascience.org/courses/scientists-guide-to-plotting-data-in-python/plot-with-matplotlib/introduction-to-matplotlib-plots/customize-plot-colors-labels-matplotlib/
1029
    fig, ax = plt.subplots(1, 1)
1030
    # add the sorted heat demand densities
1031
    # HD_2015 = HD_2015.sort_values('residential_and_service_demand',
1032
    #                               ascending=False).reset_index()
1033
    # HD_2015["Cumulative_Sum"] = (HD_2015.residential_and_service_demand.
1034
    #                              cumsum()) / 1000000
1035
    # ax.plot(HD_2015.Cumulative_Sum,
1036
    #         HD_2015.residential_and_service_demand, label='eGon2015')
1037
1038
    HD_2035 = HD_2035.sort_values(
1039
        "residential_and_service_demand", ascending=False
1040
    ).reset_index()
1041
    HD_2035["Cumulative_Sum"] = (
1042
        HD_2035.residential_and_service_demand.cumsum()
1043
    ) / 1000000
1044
    ax.plot(
1045
        HD_2035.Cumulative_Sum,
1046
        HD_2035.residential_and_service_demand,
1047
        label="eGon2035",
1048
    )
1049
1050
    HD_2050 = HD_2050.sort_values(
1051
        "residential_and_service_demand", ascending=False
1052
    ).reset_index()
1053
    HD_2050["Cumulative_Sum"] = (
1054
        HD_2050.residential_and_service_demand.cumsum()
1055
    ) / 1000000
1056
    ax.plot(
1057
        HD_2050.Cumulative_Sum,
1058
        HD_2050.residential_and_service_demand,
1059
        label="eGon100RE",
1060
    )
1061
1062
    # add the district heating shares
1063
1064
    heat_parameters = get_sector_parameters("heat", "eGon2035")
1065
    district_heating_share_2035 = heat_parameters["DE_district_heating_share"]
1066
    plt.axvline(
1067
        x=HD_2035.residential_and_service_demand.sum()
1068
        / 1000000
1069
        * district_heating_share_2035,
1070
        ls=":",
1071
        lw=0.5,
1072
        label="72TWh DH in 2035 in Germany => 14% DH",
1073
        color="black",
1074
    )
1075
    heat_parameters = get_sector_parameters("heat", "eGon100RE")
1076
    district_heating_share_100RE = heat_parameters["DE_district_heating_share"]
1077
    plt.axvline(
1078
        x=HD_2050.residential_and_service_demand.sum()
1079
        / 1000000
1080
        * district_heating_share_100RE,
1081
        ls="-.",
1082
        lw=0.5,
1083
        label="75TWh DH in 100RE in Germany => 19% DH",
1084
        color="black",
1085
    )
1086
1087
    # axes meet in (0/0)
1088
    ax.margins(x=0, y=0)  # default is 0.05
1089
    # axis style
1090
    # https://matplotlib.org/stable/gallery/ticks_and_spines/centered_spines_with_arrows.html
1091
    # Hide the right and top spines
1092
    ax.spines["right"].set_visible(False)
1093
    ax.spines["top"].set_visible(False)
1094
    ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False)
1095
    ax.plot(0, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False)
1096
1097
    ax.set(title="Heat Demand in eGo^n")
1098
    ax.set_xlabel("Cumulative Heat Demand [TWh / a]")
1099
    ax.set_ylabel("Heat Demand Densities [MWh / (ha a)]")
1100
1101
    plt.legend()
1102
    plt.savefig(results_path + "Complete_HeatDemandDensities_Curves.png")
1103
1104
    return None
1105
1106
1107
def demarcation(plotting=True):
1108
    """
1109
    Load scenario specific district heating areas with metadata into database.
1110
1111
    This function executes the functions that identifies the areas which will
1112
    be supplied with district heat in the two eGo^n scenarios. The creation of
1113
    heat demand density curve figures is optional. So is also the export of
1114
    scenario specific Prospective Supply Districts for district heating (PSDs)
1115
    as shapefiles including the creation of a figure showing the comparison
1116
    of sorted heat demand densities.
1117
1118
    The method was executed for 2015, 2035 and 2050 to find out which
1119
    scenario year defines the PSDs. The year 2035 was selected and
1120
    the function was adjusted accordingly.
1121
    If you need the 2015 scenario heat demand data, please have a look at
1122
    the heat demand script commit 270bea50332016447e869f69d51e96113073b8a0,
1123
    where the 2015 scenario was deactivated. You can study the 2015 PSDs in
1124
    the study_prospective_district_heating_areas function after
1125
    un-commenting some lines.
1126
1127
    Parameters
1128
    ----------
1129
    plotting: boolean
1130
        if True, figure showing the heat demand density curve will be created
1131
1132
    Returns
1133
    -------
1134
        None
1135
1136
    Notes
1137
    -----
1138
        None
1139
1140
    TODO
1141
    ----
1142
        Create diagrams/curves, make better curves with matplotlib
1143
1144
        Make PSD and DH system statistics
1145
        Check if you need the current / future number of DH
1146
        supplied flats and the total number of flats to calculate the
1147
        connection rate
1148
1149
        Add datasets to datasets configuration
1150
1151
    """
1152
1153
    # load the census district heat data on apartments, and group them
1154
    # This is currently done in the grouping function:
1155
    # district_heat_zensus, heating_type_zensus = load_census_data()
1156
    # Zenus_DH_areas_201m = area_grouping(district_heat_zensus)
1157
1158
    heat_density_per_scenario = {}
1159
    # scenario specific district heating areas
1160
1161
    for scenario in config.settings()["egon-data"]["--scenarios"]:
1162
        heat_density_per_scenario[scenario] = district_heating_areas(
1163
            scenario, plotting
1164
        )
1165
1166
    if plotting:
1167
        plot_heat_density_sorted(heat_density_per_scenario)
1168
    # if you want to study/export the Prospective Supply Districts (PSDs)
1169
    # for all scenarios
1170
    # study_prospective_district_heating_areas()
1171
1172
    add_metadata()
1173
1174
    return None
1175