Passed
Pull Request — dev (#931)
by
unknown
01:31
created

data.datasets.ch4_prod.load_NG_generators()   B

Complexity

Conditions 4

Size

Total Lines 98
Code Lines 60

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 60
dl 0
loc 98
rs 8.309
c 0
b 0
f 0
cc 4
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
"""
3
The central module containing all code dealing with importing CH4 production data
4
"""
5
from pathlib import Path
6
from urllib.request import urlretrieve
7
import ast
8
9
import geopandas as gpd
10
import numpy as np
11
import pandas as pd
12
13
from egon.data import config, db
14
from egon.data.config import settings
15
from egon.data.datasets import Dataset
16
from egon.data.datasets.scenario_parameters import get_sector_parameters
17
18
19
class CH4Production(Dataset):
20
    def __init__(self, dependencies):
21
        super().__init__(
22
            name="CH4Production",
23
            version="0.0.8",
24
            dependencies=dependencies,
25
            tasks=(download_biogas_data, insert_ch4_generators),
26
        )
27
28
29
def load_NG_generators(scn_name="eGon2035"):
30
    """Define the natural CH4 production units in Germany
31
32
    Parameters
33
    ----------
34
    scn_name : str
35
        Name of the scenario.
36
    Returns
37
    -------
38
    CH4_generators_list :
39
        Dataframe containing the natural gas production units in Germany
40
41
    """
42
    # read carrier information from scnario parameter data
43
    scn_params = get_sector_parameters("gas", scn_name)
44
45
    target_file = (
46
        Path(".")
47
        / "datasets"
48
        / "gas_data"
49
        / "data"
50
        / "IGGIELGN_Productions.csv"
51
    )
52
53
    NG_generators_list = pd.read_csv(
54
        target_file,
55
        delimiter=";",
56
        decimal=".",
57
        usecols=["lat", "long", "country_code", "param"],
58
    )
59
60
    NG_generators_list = NG_generators_list[
61
        NG_generators_list["country_code"].str.match("DE")
62
    ]
63
64
    # Cut data to federal state if in testmode
65
    NUTS1 = []
66
    for index, row in NG_generators_list.iterrows():
67
        param = ast.literal_eval(row["param"])
68
        NUTS1.append(param["nuts_id_1"])
69
    NG_generators_list = NG_generators_list.assign(NUTS1=NUTS1)
70
71
    boundary = settings()["egon-data"]["--dataset-boundary"]
72
    if boundary != "Everything":
73
        map_states = {
74
            "Baden-Württemberg": "DE1",
75
            "Nordrhein-Westfalen": "DEA",
76
            "Hessen": "DE7",
77
            "Brandenburg": "DE4",
78
            "Bremen": "DE5",
79
            "Rheinland-Pfalz": "DEB",
80
            "Sachsen-Anhalt": "DEE",
81
            "Schleswig-Holstein": "DEF",
82
            "Mecklenburg-Vorpommern": "DE8",
83
            "Thüringen": "DEG",
84
            "Niedersachsen": "DE9",
85
            "Sachsen": "DED",
86
            "Hamburg": "DE6",
87
            "Saarland": "DEC",
88
            "Berlin": "DE3",
89
            "Bayern": "DE2",
90
        }
91
92
        NG_generators_list = NG_generators_list[
93
            NG_generators_list["NUTS1"].isin([map_states[boundary], np.nan])
94
        ]
95
96
    NG_generators_list = NG_generators_list.rename(
97
        columns={"lat": "y", "long": "x"}
98
    )
99
    NG_generators_list = gpd.GeoDataFrame(
100
        NG_generators_list,
101
        geometry=gpd.points_from_xy(
102
            NG_generators_list["x"], NG_generators_list["y"]
103
        ),
104
    )
105
    NG_generators_list = NG_generators_list.rename(
106
        columns={"geometry": "geom"}
107
    ).set_geometry("geom", crs=4326)
108
109
    # Insert p_nom
110
    p_nom = []
111
    for index, row in NG_generators_list.iterrows():
112
        param = ast.literal_eval(row["param"])
113
        p_nom.append(param["max_supply_M_m3_per_d"])
114
115
    conversion_factor = 437.5  # MCM/day to MWh/h
116
    NG_generators_list["p_nom"] = [i * conversion_factor for i in p_nom]
117
118
    # Add missing columns
119
    NG_generators_list["marginal_cost"] = scn_params["marginal_cost"]["CH4"]
120
121
    # Remove useless columns
122
    NG_generators_list = NG_generators_list.drop(
123
        columns=["x", "y", "param", "country_code", "NUTS1"]
124
    )
125
126
    return NG_generators_list
127
128
129
def download_biogas_data():
130
    """Download the biogas production units data in Germany
131
132
    Parameters
133
    ----------
134
    None
135
136
    Returns
137
    -------
138
    None
139
140
    """
141
    basename = "Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx"
142
    url = (
143
        "https://www.biogaspartner.de/fileadmin/Biogaspartner/Dokumente/Einspeiseatlas/"
144
        + basename
145
    )
146
    target_file = Path(".") / "datasets" / "gas_data" / basename
147
148
    urlretrieve(url, target_file)
149
150
151
def load_biogas_generators(scn_name):
152
    """Define the biogas production units in Germany
153
154
    Parameters
155
    ----------
156
    scn_name : str
157
        Name of the scenario.
158
159
    Returns
160
    -------
161
    CH4_generators_list :
162
        Dataframe containing the biogas production units in Germany
163
164
    """
165
    # read carrier information from scnario parameter data
166
    scn_params = get_sector_parameters("gas", scn_name)
167
168
    basename = "Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx"
169
    target_file = Path(".") / "datasets" / "gas_data" / basename
170
171
    # Read-in data from csv-file
172
    biogas_generators_list = pd.read_excel(
173
        target_file,
174
        usecols=["Koordinaten", "Einspeisung Biomethan [(N*m^3)/h)]"],
175
    )
176
177
    x = []
178
    y = []
179
    for index, row in biogas_generators_list.iterrows():
180
        coordinates = row["Koordinaten"].split(",")
181
        y.append(coordinates[0])
182
        x.append(coordinates[1])
183
    biogas_generators_list["x"] = x
184
    biogas_generators_list["y"] = y
185
186
    biogas_generators_list = gpd.GeoDataFrame(
187
        biogas_generators_list,
188
        geometry=gpd.points_from_xy(
189
            biogas_generators_list["x"], biogas_generators_list["y"]
190
        ),
191
    )
192
    biogas_generators_list = biogas_generators_list.rename(
193
        columns={"geometry": "geom"}
194
    ).set_geometry("geom", crs=4326)
195
196
    # Connect to local database
197
    engine = db.engine()
198
199
    # Cut data to federal state if in testmode
200
    boundary = settings()["egon-data"]["--dataset-boundary"]
201
    if boundary != "Everything":
202
        db.execute_sql(
203
            """
204
              DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE;
205
            """
206
        )
207
        biogas_generators_list.to_postgis(
208
            "egon_biogas_generator",
209
            engine,
210
            schema="grid",
211
            index=False,
212
            if_exists="replace",
213
        )
214
215
        sql = """SELECT *
216
            FROM grid.egon_biogas_generator, boundaries.vg250_sta_union as vg
217
            WHERE ST_Transform(vg.geometry,4326) && egon_biogas_generator.geom
218
            AND ST_Contains(ST_Transform(vg.geometry,4326), egon_biogas_generator.geom)"""
219
220
        biogas_generators_list = gpd.GeoDataFrame.from_postgis(
221
            sql, con=engine, geom_col="geom", crs=4326
222
        )
223
        biogas_generators_list = biogas_generators_list.drop(
224
            columns=["id", "bez", "area_ha", "geometry"]
225
        )
226
        db.execute_sql(
227
            """
228
              DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE;
229
            """
230
        )
231
232
    # Insert p_nom
233
    conversion_factor = 0.01083  # m^3/h to MWh/h
234
    biogas_generators_list["p_nom"] = [
235
        i * conversion_factor
236
        for i in biogas_generators_list["Einspeisung Biomethan [(N*m^3)/h)]"]
237
    ]
238
239
    # Add missing columns
240
    biogas_generators_list["marginal_cost"] = scn_params["marginal_cost"][
241
        "biogas"
242
    ]
243
244
    # Remove useless columns
245
    biogas_generators_list = biogas_generators_list.drop(
246
        columns=["x", "y", "Koordinaten", "Einspeisung Biomethan [(N*m^3)/h)]"]
247
    )
248
    return biogas_generators_list
249
250
251
def import_gas_generators(scn_name):
252
    """Insert list of gas production units in database
253
254
    Parameters
255
    ----------
256
    scn_name : str
257
        Name of the scenario.
258
    """
259
    carrier = "CH4"
260
261
    # Connect to local database
262
    engine = db.engine()
263
264
    # Select source and target from dataset configuration
265
    source = config.datasets()["gas_prod"]["source"]
266
    target = config.datasets()["gas_prod"]["target"]
267
268
    # Clean table
269
    db.execute_sql(
270
        f"""
271
        DELETE FROM {target['stores']['schema']}.{target['stores']['table']}
272
        WHERE "carrier" = '{carrier}' AND
273
        scn_name = '{scn_name}' AND bus not IN (
274
            SELECT bus_id FROM {source['buses']['schema']}.{source['buses']['table']}
275
            WHERE scn_name = '{scn_name}' AND country != 'DE'
276
        );
277
        """
278
    )
279
280
    if scn_name == "eGon2035":
281
        CH4_generators_list = pd.concat(
282
            [load_NG_generators(scn_name), load_biogas_generators(scn_name)]
283
        )
284
285
    elif scn_name == "eGon100RE":
286
        CH4_generators_list = load_biogas_generators(scn_name)
287
288
    # Add missing columns
289
    c = {"scn_name": scn_name, "carrier": carrier}
290
    CH4_generators_list = CH4_generators_list.assign(**c)
0 ignored issues
show
introduced by
The variable CH4_generators_list does not seem to be defined for all execution paths.
Loading history...
291
292
    # Match to associated CH4 bus
293
    CH4_generators_list = db.assign_gas_bus_id(
294
        CH4_generators_list, scn_name, carrier
295
    )
296
297
    # Remove useless columns
298
    CH4_generators_list = CH4_generators_list.drop(columns=["geom", "bus_id"])
299
300
    # Aggregate ch4 productions with same properties at the same bus
301
    CH4_generators_list = (
302
        CH4_generators_list.groupby(
303
            ["bus", "carrier", "scn_name", "marginal_cost"]
304
        )
305
        .agg({"p_nom": "sum"})
306
        .reset_index(drop=False)
307
    )
308
309
    new_id = db.next_etrago_id("generator")
310
    CH4_generators_list["generator_id"] = range(
311
        new_id, new_id + len(CH4_generators_list)
312
    )
313
314
    # Insert data to db
315
    CH4_generators_list.to_sql(
316
        target["stores"]["table"],
317
        engine,
318
        schema=target["stores"]["schema"],
319
        index=False,
320
        if_exists="append",
321
    )
322
323
324
def insert_ch4_generators():
325
    """Insert gas production units in database for both scenarios
326
327
    Parameters
328
    ----------
329
    None
330
331
    Returns
332
    -------
333
    None
334
    """
335
    import_gas_generators("eGon2035")
336
    import_gas_generators("eGon100RE")
337