Passed
Pull Request — dev (#840)
by
unknown
01:36
created

data.datasets.ch4_prod.load_NG_generators()   B

Complexity

Conditions 4

Size

Total Lines 98
Code Lines 60

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 60
dl 0
loc 98
rs 8.309
c 0
b 0
f 0
cc 4
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
"""
3
The central module containing all code dealing with importing CH4 production data
4
"""
5
from pathlib import Path
6
from urllib.request import urlretrieve
7
import ast
8
9
import geopandas as gpd
10
import numpy as np
11
import pandas as pd
12
13
from egon.data import config, db
14
from egon.data.config import settings
15
from egon.data.datasets import Dataset
16
from egon.data.datasets.scenario_parameters import get_sector_parameters
17
18
19
class CH4Production(Dataset):
20
    def __init__(self, dependencies):
21
        super().__init__(
22
            name="CH4Production",
23
            version="0.0.7",
24
            dependencies=dependencies,
25
            tasks=(import_gas_generators),
26
        )
27
28
29
def load_NG_generators(scn_name):
30
    """Define the natural CH4 production units in Germany
31
32
    Parameters
33
    ----------
34
    scn_name : str
35
        Name of the scenario.
36
    Returns
37
    -------
38
    CH4_generators_list :
39
        Dataframe containing the natural gas production units in Germany
40
41
    """
42
    # read carrier information from scnario parameter data
43
    scn_params = get_sector_parameters("gas", scn_name)
44
45
    target_file = (
46
        Path(".")
47
        / "datasets"
48
        / "gas_data"
49
        / "data"
50
        / "IGGIELGN_Productions.csv"
51
    )
52
53
    NG_generators_list = pd.read_csv(
54
        target_file,
55
        delimiter=";",
56
        decimal=".",
57
        usecols=["lat", "long", "country_code", "param"],
58
    )
59
60
    NG_generators_list = NG_generators_list[
61
        NG_generators_list["country_code"].str.match("DE")
62
    ]
63
64
    # Cut data to federal state if in testmode
65
    NUTS1 = []
66
    for index, row in NG_generators_list.iterrows():
67
        param = ast.literal_eval(row["param"])
68
        NUTS1.append(param["nuts_id_1"])
69
    NG_generators_list = NG_generators_list.assign(NUTS1=NUTS1)
70
71
    boundary = settings()["egon-data"]["--dataset-boundary"]
72
    if boundary != "Everything":
73
        map_states = {
74
            "Baden-Württemberg": "DE1",
75
            "Nordrhein-Westfalen": "DEA",
76
            "Hessen": "DE7",
77
            "Brandenburg": "DE4",
78
            "Bremen": "DE5",
79
            "Rheinland-Pfalz": "DEB",
80
            "Sachsen-Anhalt": "DEE",
81
            "Schleswig-Holstein": "DEF",
82
            "Mecklenburg-Vorpommern": "DE8",
83
            "Thüringen": "DEG",
84
            "Niedersachsen": "DE9",
85
            "Sachsen": "DED",
86
            "Hamburg": "DE6",
87
            "Saarland": "DEC",
88
            "Berlin": "DE3",
89
            "Bayern": "DE2",
90
        }
91
92
        NG_generators_list = NG_generators_list[
93
            NG_generators_list["NUTS1"].isin([map_states[boundary], np.nan])
94
        ]
95
96
    NG_generators_list = NG_generators_list.rename(
97
        columns={"lat": "y", "long": "x"}
98
    )
99
    NG_generators_list = gpd.GeoDataFrame(
100
        NG_generators_list,
101
        geometry=gpd.points_from_xy(
102
            NG_generators_list["x"], NG_generators_list["y"]
103
        ),
104
    )
105
    NG_generators_list = NG_generators_list.rename(
106
        columns={"geometry": "geom"}
107
    ).set_geometry("geom", crs=4326)
108
109
    # Insert p_nom
110
    p_nom = []
111
    for index, row in NG_generators_list.iterrows():
112
        param = ast.literal_eval(row["param"])
113
        p_nom.append(param["max_supply_M_m3_per_d"])
114
115
    conversion_factor = 437.5  # MCM/day to MWh/h
116
    NG_generators_list["p_nom"] = [i * conversion_factor for i in p_nom]
117
118
    # Add missing columns
119
    NG_generators_list["marginal_cost"] = scn_params["marginal_cost"]["CH4"]
120
121
    # Remove useless columns
122
    NG_generators_list = NG_generators_list.drop(
123
        columns=["x", "y", "param", "country_code", "NUTS1"]
124
    )
125
126
    return NG_generators_list
127
128
129
def load_biogas_generators(scn_name):
130
    """Define the biogas production units in Germany
131
132
    Parameters
133
    ----------
134
    scn_name : str
135
        Name of the scenario.
136
    Returns
137
    -------
138
    CH4_generators_list :
139
        Dataframe containing the biogas production units in Germany
140
141
    """
142
    # read carrier information from scnario parameter data
143
    scn_params = get_sector_parameters("gas", scn_name)
144
145
    # Download file
146
    basename = "Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx"
147
    url = (
148
        "https://www.biogaspartner.de/fileadmin/Biogaspartner/Dokumente/Einspeiseatlas/"
149
        + basename
150
    )
151
    target_file = Path(".") / "datasets" / "gas_data" / basename
152
153
    urlretrieve(url, target_file)
154
155
    # Read-in data from csv-file
156
    biogas_generators_list = pd.read_excel(
157
        target_file,
158
        usecols=["Koordinaten", "Einspeisung Biomethan [(N*m^3)/h)]"],
159
    )
160
161
    x = []
162
    y = []
163
    for index, row in biogas_generators_list.iterrows():
164
        coordinates = row["Koordinaten"].split(",")
165
        y.append(coordinates[0])
166
        x.append(coordinates[1])
167
    biogas_generators_list["x"] = x
168
    biogas_generators_list["y"] = y
169
170
    biogas_generators_list = gpd.GeoDataFrame(
171
        biogas_generators_list,
172
        geometry=gpd.points_from_xy(
173
            biogas_generators_list["x"], biogas_generators_list["y"]
174
        ),
175
    )
176
    biogas_generators_list = biogas_generators_list.rename(
177
        columns={"geometry": "geom"}
178
    ).set_geometry("geom", crs=4326)
179
180
    # Connect to local database
181
    engine = db.engine()
182
183
    # Cut data to federal state if in testmode
184
    boundary = settings()["egon-data"]["--dataset-boundary"]
185
    if boundary != "Everything":
186
        db.execute_sql(
187
            """
188
              DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE;
189
            """
190
        )
191
        biogas_generators_list.to_postgis(
192
            "egon_biogas_generator",
193
            engine,
194
            schema="grid",
195
            index=False,
196
            if_exists="replace",
197
        )
198
199
        sql = """SELECT *
200
            FROM grid.egon_biogas_generator, boundaries.vg250_sta_union  as vg
201
            WHERE ST_Transform(vg.geometry,4326) && egon_biogas_generator.geom
202
            AND ST_Contains(ST_Transform(vg.geometry,4326), egon_biogas_generator.geom)"""
203
204
        biogas_generators_list = gpd.GeoDataFrame.from_postgis(
205
            sql, con=engine, geom_col="geom", crs=4326
206
        )
207
        biogas_generators_list = biogas_generators_list.drop(
208
            columns=["id", "bez", "area_ha", "geometry"]
209
        )
210
        db.execute_sql(
211
            """
212
              DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE;
213
            """
214
        )
215
216
    # Insert p_nom
217
    conversion_factor = 0.01083  # m^3/h to MWh/h
218
    biogas_generators_list["p_nom"] = [
219
        i * conversion_factor
220
        for i in biogas_generators_list["Einspeisung Biomethan [(N*m^3)/h)]"]
221
    ]
222
223
    # Add missing columns
224
    biogas_generators_list["marginal_cost"] = scn_params["marginal_cost"][
225
        "biogas"
226
    ]
227
228
    # Remove useless columns
229
    biogas_generators_list = biogas_generators_list.drop(
230
        columns=["x", "y", "Koordinaten", "Einspeisung Biomethan [(N*m^3)/h)]"]
231
    )
232
    return biogas_generators_list
233
234
235
def import_gas_generators(scn_name="eGon2035"):
236
    """Insert list of gas production units in database
237
238
    Parameters
239
    ----------
240
    scn_name : str
241
        Name of the scenario.
242
    """
243
    # Connect to local database
244
    engine = db.engine()
245
246
    # Select source and target from dataset configuration
247
    source = config.datasets()["gas_prod"]["source"]
248
    target = config.datasets()["gas_prod"]["target"]
249
250
    # Clean table
251
    db.execute_sql(
252
        f"""
253
        DELETE FROM {target['stores']['schema']}.{target['stores']['table']}
254
        WHERE "carrier" = 'CH4' AND
255
        scn_name = '{scn_name}' AND bus not IN (
256
            SELECT bus_id FROM {source['buses']['schema']}.{source['buses']['table']}
257
            WHERE scn_name = '{scn_name}' AND country != 'DE'
258
        );
259
        """
260
    )
261
262
    CH4_generators_list = pd.concat(
263
        [load_NG_generators(scn_name), load_biogas_generators(scn_name)]
264
    )
265
266
    # Add missing columns
267
    c = {"scn_name": scn_name, "carrier": "CH4"}
268
    CH4_generators_list = CH4_generators_list.assign(**c)
269
270
    # Match to associated CH4 bus
271
    CH4_generators_list = db.assign_gas_bus_id(
272
        CH4_generators_list, scn_name, "CH4"
273
    )
274
275
    # Remove useless columns
276
    CH4_generators_list = CH4_generators_list.drop(columns=["geom", "bus_id"])
277
278
    # Aggregate ch4 productions with same properties at the same bus
279
    CH4_generators_list = (
280
        CH4_generators_list.groupby(
281
            ["bus", "carrier", "scn_name", "marginal_cost"]
282
        )
283
        .agg({"p_nom": "sum"})
284
        .reset_index(drop=False)
285
    )
286
287
    new_id = db.next_etrago_id("generator")
288
    CH4_generators_list["generator_id"] = range(
289
        new_id, new_id + len(CH4_generators_list)
290
    )
291
292
    # Insert data to db
293
    CH4_generators_list.to_sql(
294
        target["stores"]["table"],
295
        engine,
296
        schema=target["stores"]["schema"],
297
        index=False,
298
        if_exists="append",
299
    )
300