Passed
Pull Request — dev (#1170)
by
unknown
05:05
created

data.datasets.ch4_prod.import_gas_generators()   B

Complexity

Conditions 4

Size

Total Lines 116
Code Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 39
dl 0
loc 116
rs 8.9439
c 0
b 0
f 0
cc 4
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
"""
3
The central module containing code dealing with importing CH4 production data for eGon2035.
4
5
For eGon2035, the gas produced in Germany can be natural gas or biogas.
6
The source productions are geolocalised potentials described as PyPSA
7
generators. These generators are not extendable and their overall
8
production over the year is limited directly in eTraGo by values from
9
the Netzentwicklungsplan Gas 2020–2030 (36 TWh natural gas and 10 TWh
10
biogas), also stored in the table
11
:py:class:`scenario.egon_scenario_parameters <egon.data.datasets.scenario_parameters.EgonScenario>`.
12
13
"""
14
from pathlib import Path
15
from urllib.request import urlretrieve
16
import ast
17
18
import geopandas as gpd
19
import numpy as np
20
import pandas as pd
21
22
from egon.data import config, db
23
from egon.data.config import settings
24
from egon.data.datasets import Dataset
25
from egon.data.datasets.scenario_parameters import get_sector_parameters
26
27
28
class CH4Production(Dataset):
29
    """
30
    Insert the CH4 productions into the database for eGon2035
31
32
    Insert the CH4 productions into the database for eGon2035 by using
33
    the function :py:func:`import_gas_generators`.
34
35
    *Dependencies*
36
      * :py:class:`GasAreaseGon2035 <egon.data.datasets.gas_areas.GasAreaseGon2035>`
37
      * :py:class:`GasNodesAndPipes <egon.data.datasets.gas_grid.GasNodesAndPipes>`
38
39
    *Resulting tables*
40
      * :py:class:`grid.egon_etrago_generator <egon.data.datasets.etrago_setup.EgonPfHvGenerator>` is extended
41
42
    """
43
44
    #:
45
    name: str = "CH4Production"
46
    #:
47
48
    version: str = "0.0.9"
49
50
    def __init__(self, dependencies):
51
        super().__init__(
52
            name=self.name,
53
            version=self.version,
54
            dependencies=dependencies,
55
            tasks=(import_gas_generators),
56
        )
57
58
59
def load_NG_generators(scn_name):
60
    """
61
    Define the fossil CH4 production units in Germany
62
63
    This function reads from the SciGRID_gas dataset the fossil CH4
64
    production units in Germany, adjusts and returns them.
65
    Natural gas production reference: SciGRID_gas dataset (datasets/gas_data/data/IGGIELGN_Production.csv
66
    downloaded in :func:`download_SciGRID_gas_data <egon.data.datasets.gas_grid.download_SciGRID_gas_data>`).
67
    For more information on this data, refer to the
68
    `SciGRID_gas IGGIELGN documentation <https://zenodo.org/record/4767098>`_.
69
70
    Parameters
71
    ----------
72
    scn_name : str
73
        Name of the scenario.
74
75
    Returns
76
    -------
77
    CH4_generators_list : pandas.DataFrame
78
        Dataframe containing the natural gas production units in Germany
79
80
    """
81
    # read carrier information from scnario parameter data
82
    scn_params = get_sector_parameters("gas", scn_name)
83
84
    target_file = (
85
        Path(".")
86
        / "datasets"
87
        / "gas_data"
88
        / "data"
89
        / "IGGIELGN_Productions.csv"
90
    )
91
92
    NG_generators_list = pd.read_csv(
93
        target_file,
94
        delimiter=";",
95
        decimal=".",
96
        usecols=["lat", "long", "country_code", "param"],
97
    )
98
99
    NG_generators_list = NG_generators_list[
100
        NG_generators_list["country_code"].str.match("DE")
101
    ]
102
103
    # Cut data to federal state if in testmode
104
    NUTS1 = []
105
    for index, row in NG_generators_list.iterrows():
106
        param = ast.literal_eval(row["param"])
107
        NUTS1.append(param["nuts_id_1"])
108
    NG_generators_list = NG_generators_list.assign(NUTS1=NUTS1)
109
110
    boundary = settings()["egon-data"]["--dataset-boundary"]
111
    if boundary != "Everything":
112
        map_states = {
113
            "Baden-Württemberg": "DE1",
114
            "Nordrhein-Westfalen": "DEA",
115
            "Hessen": "DE7",
116
            "Brandenburg": "DE4",
117
            "Bremen": "DE5",
118
            "Rheinland-Pfalz": "DEB",
119
            "Sachsen-Anhalt": "DEE",
120
            "Schleswig-Holstein": "DEF",
121
            "Mecklenburg-Vorpommern": "DE8",
122
            "Thüringen": "DEG",
123
            "Niedersachsen": "DE9",
124
            "Sachsen": "DED",
125
            "Hamburg": "DE6",
126
            "Saarland": "DEC",
127
            "Berlin": "DE3",
128
            "Bayern": "DE2",
129
        }
130
131
        NG_generators_list = NG_generators_list[
132
            NG_generators_list["NUTS1"].isin([map_states[boundary], np.nan])
133
        ]
134
135
    NG_generators_list = NG_generators_list.rename(
136
        columns={"lat": "y", "long": "x"}
137
    )
138
    NG_generators_list = gpd.GeoDataFrame(
139
        NG_generators_list,
140
        geometry=gpd.points_from_xy(
141
            NG_generators_list["x"], NG_generators_list["y"]
142
        ),
143
    )
144
    NG_generators_list = NG_generators_list.rename(
145
        columns={"geometry": "geom"}
146
    ).set_geometry("geom", crs=4326)
147
148
    # Insert p_nom
149
    p_nom = []
150
    for index, row in NG_generators_list.iterrows():
151
        param = ast.literal_eval(row["param"])
152
        p_nom.append(param["max_supply_M_m3_per_d"])
153
154
    conversion_factor = 437.5  # MCM/day to MWh/h
155
    NG_generators_list["p_nom"] = [i * conversion_factor for i in p_nom]
156
157
    # Add missing columns
158
    NG_generators_list["marginal_cost"] = scn_params["marginal_cost"]["CH4"]
159
160
    # Remove useless columns
161
    NG_generators_list = NG_generators_list.drop(
162
        columns=["x", "y", "param", "country_code", "NUTS1"]
163
    )
164
165
    return NG_generators_list
166
167
168
def load_biogas_generators(scn_name):
169
    """
170
    Define the biogas production units in Germany
171
172
    This function downloads the Biogaspartner Einspeiseatlas into
173
    (datasets/gas_data/Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx),
174
    reads the biogas production units in Germany data, adjusts and
175
    returns them.
176
    For more information on this data refer to the
177
    `Einspeiseatlas website <https://www.biogaspartner.de/einspeiseatlas/>`_.
178
179
    Parameters
180
    ----------
181
    scn_name : str
182
        Name of the scenario
183
184
    Returns
185
    -------
186
    CH4_generators_list : pandas.DataFrame
187
        Dataframe containing the biogas production units in Germany
188
189
    """
190
    # read carrier information from scnario parameter data
191
    scn_params = get_sector_parameters("gas", scn_name)
192
193
    # Download file
194
    basename = "Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx"
195
    url = (
196
        "https://www.biogaspartner.de/fileadmin/Biogaspartner/Dokumente/Einspeiseatlas/"
197
        + basename
198
    )
199
    target_file = Path(".") / "datasets" / "gas_data" / basename
200
201
    urlretrieve(url, target_file)
202
203
    # Read-in data from csv-file
204
    biogas_generators_list = pd.read_excel(
205
        target_file,
206
        usecols=["Koordinaten", "Einspeisung Biomethan [(N*m^3)/h)]"],
207
    )
208
209
    x = []
210
    y = []
211
    for index, row in biogas_generators_list.iterrows():
212
        coordinates = row["Koordinaten"].split(",")
213
        y.append(coordinates[0])
214
        x.append(coordinates[1])
215
    biogas_generators_list["x"] = x
216
    biogas_generators_list["y"] = y
217
218
    biogas_generators_list = gpd.GeoDataFrame(
219
        biogas_generators_list,
220
        geometry=gpd.points_from_xy(
221
            biogas_generators_list["x"], biogas_generators_list["y"]
222
        ),
223
    )
224
    biogas_generators_list = biogas_generators_list.rename(
225
        columns={"geometry": "geom"}
226
    ).set_geometry("geom", crs=4326)
227
228
    # Connect to local database
229
    engine = db.engine()
230
231
    # Cut data to federal state if in testmode
232
    boundary = settings()["egon-data"]["--dataset-boundary"]
233
    if boundary != "Everything":
234
        db.execute_sql(
235
            """
236
              DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE;
237
            """
238
        )
239
        biogas_generators_list.to_postgis(
240
            "egon_biogas_generator",
241
            engine,
242
            schema="grid",
243
            index=False,
244
            if_exists="replace",
245
        )
246
247
        sql = """SELECT *
248
            FROM grid.egon_biogas_generator, boundaries.vg250_sta_union  as vg
249
            WHERE ST_Transform(vg.geometry,4326) && egon_biogas_generator.geom
250
            AND ST_Contains(ST_Transform(vg.geometry,4326), egon_biogas_generator.geom)"""
251
252
        biogas_generators_list = gpd.GeoDataFrame.from_postgis(
253
            sql, con=engine, geom_col="geom", crs=4326
254
        )
255
        biogas_generators_list = biogas_generators_list.drop(
256
            columns=["id", "bez", "area_ha", "geometry"]
257
        )
258
        db.execute_sql(
259
            """
260
              DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE;
261
            """
262
        )
263
264
    # Insert p_nom
265
    conversion_factor = 0.01083  # m^3/h to MWh/h
266
    biogas_generators_list["p_nom"] = [
267
        i * conversion_factor
268
        for i in biogas_generators_list["Einspeisung Biomethan [(N*m^3)/h)]"]
269
    ]
270
271
    # Add missing columns
272
    biogas_generators_list["marginal_cost"] = scn_params["marginal_cost"][
273
        "biogas"
274
    ]
275
276
    # Remove useless columns
277
    biogas_generators_list = biogas_generators_list.drop(
278
        columns=["x", "y", "Koordinaten", "Einspeisung Biomethan [(N*m^3)/h)]"]
279
    )
280
    return biogas_generators_list
281
282
283
def import_gas_generators():
284
    """
285
    Inserts list of gas production units into the database
286
287
    To insert the gas production units into the database, the following
288
    steps are followed:
289
290
      * cleaning of the database table grid.egon_etrago_generator of the
291
        CH4 generators of the specific scenario (eGon2035),
292
      * call of the functions :py:func:`load_NG_generators` and
293
        :py:func:`load_biogas_generators` that respectively return
294
        dataframes containing the natural- an bio-gas production units
295
        in Germany,
296
      * attribution of the bus_id to which each generator is connected
297
        (call the function :func:`assign_gas_bus_id <egon.data.db.assign_gas_bus_id>`
298
        from :py:mod:`egon.data.db <egon.data.db>`),
299
      * aggregation of the CH4 productions with same properties at the
300
        same bus. The properties that should be the same in order that
301
        different generators are aggregated are:
302
          * scenario
303
          * carrier
304
          * marginal cost: this parameter differentiates the natural gas
305
            generators from the biogas generators,
306
      * addition of the missing columns: scn_name, carrier and
307
        generator_id,
308
      * insertion of the generators into the database.
309
310
    Parameters
311
    ----------
312
    scn_name : str
313
        Name of the scenario.
314
315
    Returns
316
    -------
317
    None
318
319
    """
320
    # Connect to local database
321
    engine = db.engine()
322
323
    # Select source and target from dataset configuration
324
    source = config.datasets()["gas_prod"]["source"]
325
    target = config.datasets()["gas_prod"]["target"]
326
327
    for scn_name in config.settings()["egon-data"]["--scenarios"]:
328
        # Clean table
329
        db.execute_sql(
330
            f"""
331
            DELETE FROM {target['stores']['schema']}.{target['stores']['table']}
332
            WHERE "carrier" = 'CH4' AND
333
            scn_name = '{scn_name}' AND bus not IN (
334
                SELECT bus_id FROM {source['buses']['schema']}.{source['buses']['table']}
335
                WHERE scn_name = '{scn_name}' AND country != 'DE'
336
            );
337
            """
338
        )
339
340
        if scn_name == "eGon2035":
341
            CH4_generators_list = pd.concat(
342
                [
343
                    load_NG_generators(scn_name),
344
                    load_biogas_generators(scn_name),
345
                ]
346
            )
347
348
            # Add missing columns
349
            c = {"scn_name": scn_name, "carrier": "CH4"}
350
            CH4_generators_list = CH4_generators_list.assign(**c)
351
352
            # Match to associated CH4 bus
353
            CH4_generators_list = db.assign_gas_bus_id(
354
                CH4_generators_list, scn_name, "CH4"
355
            )
356
357
            # Remove useless columns
358
            CH4_generators_list = CH4_generators_list.drop(
359
                columns=["geom", "bus_id"]
360
            )
361
362
            # Aggregate ch4 productions with same properties at the same bus
363
            CH4_generators_list = (
364
                CH4_generators_list.groupby(
365
                    ["bus", "carrier", "scn_name", "marginal_cost"]
366
                )
367
                .agg({"p_nom": "sum"})
368
                .reset_index(drop=False)
369
            )
370
371
        elif "status" in scn_name:
372
            # Add one large CH4 generator at each CH4 bus
373
            CH4_generators_list = db.select_dataframe(
374
                f"""
375
                SELECT bus_id as bus, scn_name, carrier
376
                FROM grid.egon_gas_voronoi
377
                WHERE scn_name = '{scn_name}'
378
                AND carrier = 'CH4'
379
                """
380
            )
381
382
            CH4_generators_list["marginal_cost"] = get_sector_parameters(
383
                "gas", scn_name
384
            )["marginal_cost"]["CH4"]
385
            CH4_generators_list["p_nom"] = 100000
386
387
        new_id = db.next_etrago_id("generator")
388
        CH4_generators_list["generator_id"] = range(
389
            new_id, new_id + len(CH4_generators_list)
0 ignored issues
show
introduced by
The variable CH4_generators_list does not seem to be defined for all execution paths.
Loading history...
390
        )
391
392
        # Insert data to db
393
        CH4_generators_list.to_sql(
394
            target["stores"]["table"],
395
            engine,
396
            schema=target["stores"]["schema"],
397
            index=False,
398
            if_exists="append",
399
        )
400