Passed
Pull Request — dev (#1375)
by
unknown
02:18
created

GasGeneratorsCapacity.evaluate_df()   B

Complexity

Conditions 5

Size

Total Lines 93
Code Lines 61

Duplication

Lines 93
Ratio 100 %

Importance

Changes 0
Metric Value
eloc 61
dl 93
loc 93
rs 7.8096
c 0
b 0
f 0
cc 5
nop 3

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
Sanity check validation rules for gas loads and generators.
3
4
Validates gas demand and generation capacity against reference data.
5
"""
6
7
from pathlib import Path
8
import pandas as pd
9
import ast
10
from egon_validation.rules.base import DataFrameRule, RuleResult, Severity
11
12
13
class GasLoadsCapacity(DataFrameRule):
14
    """
15
    Validate gas loads capacity against reference data.
16
17
    Compares the total annual load (in TWh) for gas loads in Germany
18
    from the database against reference data from opendata.ffe.
19
    This validates that industrial gas demand (CH4 and H2) matches
20
    expected values from external sources.
21
    """
22
23
    def __init__(self, table: str, rule_id: str, scenario: str = "eGon2035",
24
                 carrier: str = "CH4_for_industry", rtol: float = 0.10, **kwargs):
25
        """
26
        Parameters
27
        ----------
28
        table : str
29
            Target table (grid.egon_etrago_load)
30
        rule_id : str
31
            Unique identifier for this validation rule
32
        scenario : str
33
            Scenario name ("eGon2035" or "eGon100RE")
34
        carrier : str
35
            Load carrier type ("CH4_for_industry" or "H2_for_industry")
36
        rtol : float
37
            Relative tolerance for capacity deviation (default: 0.10 = 10%)
38
        """
39
        super().__init__(rule_id=rule_id, table=table, scenario=scenario,
40
                         carrier=carrier, rtol=rtol, **kwargs)
41
        self.kind = "sanity"
42
        self.scenario = scenario
43
        self.carrier = carrier
44
45
    def get_query(self, ctx):
46
        """
47
        Query to get total annual load for gas loads in Germany.
48
49
        Returns a query that sums the annual load from timeseries data
50
        for the specified carrier in Germany, converting to TWh.
51
        """
52
        return f"""
53
        SELECT (SUM(
54
            (SELECT SUM(p)
55
            FROM UNNEST(b.p_set) p))/1000000)::numeric as load_twh
56
        FROM grid.egon_etrago_load a
57
        JOIN grid.egon_etrago_load_timeseries b
58
        ON (a.load_id = b.load_id)
59
        JOIN grid.egon_etrago_bus c
60
        ON (a.bus=c.bus_id)
61
        WHERE b.scn_name = '{self.scenario}'
62
        AND a.scn_name = '{self.scenario}'
63
        AND c.scn_name = '{self.scenario}'
64
        AND c.country = 'DE'
65
        AND a.carrier = '{self.carrier}'
66
        """
67
68
    def _get_reference_capacity(self):
69
        """
70
        Calculate reference load capacity from opendata.ffe data.
71
72
        Returns
73
        -------
74
        float
75
            Expected total annual load in TWh
76
        """
77
        try:
78
            path = Path(".") / "datasets" / "gas_data" / "demand"
79
80
            # Read region correlation file
81
            corr_file = path / "region_corr.json"
82
            df_corr = pd.read_json(corr_file)
83
            df_corr = df_corr.loc[:, ["id_region", "name_short"]]
84
            df_corr.set_index("id_region", inplace=True)
85
86
            # Read demand data for carrier
87
            input_gas_demand = pd.read_json(
88
                path / (self.carrier + f"_{self.scenario}.json")
89
            )
90
            input_gas_demand = input_gas_demand.loc[:, ["id_region", "value"]]
91
            input_gas_demand.set_index("id_region", inplace=True)
92
93
            # Join with correlation and filter for Germany
94
            input_gas_demand = pd.concat(
95
                [input_gas_demand, df_corr], axis=1, join="inner"
96
            )
97
            input_gas_demand["NUTS0"] = (input_gas_demand["name_short"].str)[0:2]
98
            input_gas_demand = input_gas_demand[
99
                input_gas_demand["NUTS0"].str.match("DE")
100
            ]
101
102
            # Sum and convert to TWh
103
            total_demand = sum(input_gas_demand.value.to_list()) / 1000000
104
105
            return float(total_demand)
106
107
        except Exception as e:
108
            raise ValueError(f"Error reading reference load data: {str(e)}")
109
110 View Code Duplication
    def evaluate_df(self, df, ctx):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
111
        """
112
        Evaluate gas loads capacity against reference data.
113
114
        Parameters
115
        ----------
116
        df : pd.DataFrame
117
            DataFrame with load_twh column
118
        ctx : dict
119
            Context information
120
121
        Returns
122
        -------
123
        RuleResult
124
            Validation result with success/failure status
125
        """
126
        if df.empty or df["load_twh"].isna().all():
127
            return RuleResult(
128
                rule_id=self.rule_id,
129
                task=self.task,
130
                table=self.table,
131
                kind=self.kind,
132
                success=False,
133
                message=f"No {self.carrier} loads found for scenario {self.scenario}",
134
                severity=Severity.WARNING,
135
                schema=self.schema,
136
                table_name=self.table_name,
137
                rule_class=self.__class__.__name__
138
            )
139
140
        observed_load = float(df["load_twh"].values[0])
141
142
        # Get expected capacity from reference data
143
        try:
144
            expected_load = self._get_reference_capacity()
145
        except Exception as e:
146
            return RuleResult(
147
                rule_id=self.rule_id,
148
                task=self.task,
149
                table=self.table,
150
                kind=self.kind,
151
                success=False,
152
                message=str(e),
153
                severity=Severity.ERROR,
154
                schema=self.schema,
155
                table_name=self.table_name,
156
                rule_class=self.__class__.__name__
157
            )
158
159
        # Calculate relative deviation
160
        rtol = self.params.get("rtol", 0.10)
161
        deviation = abs(observed_load - expected_load) / expected_load
162
163
        success = deviation <= rtol
164
        deviation_pct = deviation * 100
165
166
        if success:
167
            return RuleResult(
168
                rule_id=self.rule_id,
169
                task=self.task,
170
                table=self.table,
171
                kind=self.kind,
172
                success=True,
173
                observed=observed_load,
174
                expected=expected_load,
175
                message=(
176
                    f"{self.carrier} load valid for {self.scenario}: "
177
                    f"{observed_load:.2f} TWh (deviation: {deviation_pct:.2f}%, "
178
                    f"tolerance: {rtol*100:.2f}%)"
179
                ),
180
                severity=Severity.INFO,
181
                schema=self.schema,
182
                table_name=self.table_name,
183
                rule_class=self.__class__.__name__
184
            )
185
        else:
186
            return RuleResult(
187
                rule_id=self.rule_id,
188
                task=self.task,
189
                table=self.table,
190
                kind=self.kind,
191
                success=False,
192
                observed=observed_load,
193
                expected=expected_load,
194
                message=(
195
                    f"{self.carrier} load deviation too large for {self.scenario}: "
196
                    f"{observed_load:.2f} vs {expected_load:.2f} TWh expected "
197
                    f"(deviation: {deviation_pct:.2f}%, tolerance: {rtol*100:.2f}%)"
198
                ),
199
                severity=Severity.ERROR,
200
                schema=self.schema,
201
                table_name=self.table_name,
202
                rule_class=self.__class__.__name__
203
            )
204
205
206
class GasGeneratorsCapacity(DataFrameRule):
207
    """
208
    Validate gas generators capacity against reference data.
209
210
    Compares the total nominal power (p_nom) of CH4 generators in Germany
211
    from the database against reference data from SciGRID_gas productions
212
    and the Biogaspartner Einspeiseatlas.
213
    """
214
215
    def __init__(self, table: str, rule_id: str, scenario: str = "eGon2035",
216
                 carrier: str = "CH4", rtol: float = 0.10, **kwargs):
217
        """
218
        Parameters
219
        ----------
220
        table : str
221
            Target table (grid.egon_etrago_generator)
222
        rule_id : str
223
            Unique identifier for this validation rule
224
        scenario : str
225
            Scenario name ("eGon2035" or "eGon100RE")
226
        carrier : str
227
            Generator carrier type (default: "CH4")
228
        rtol : float
229
            Relative tolerance for capacity deviation (default: 0.10 = 10%)
230
        """
231
        super().__init__(rule_id=rule_id, table=table, scenario=scenario,
232
                         carrier=carrier, rtol=rtol, **kwargs)
233
        self.kind = "sanity"
234
        self.scenario = scenario
235
        self.carrier = carrier
236
237
    def get_query(self, ctx):
238
        """
239
        Query to get total generator capacity in Germany.
240
241
        Returns a query that sums the p_nom of all gas generators
242
        in Germany for the specified carrier.
243
        """
244
        return f"""
245
        SELECT SUM(p_nom::numeric) as p_nom_germany
246
        FROM grid.egon_etrago_generator
247
        WHERE scn_name = '{self.scenario}'
248
        AND carrier = '{self.carrier}'
249
        AND bus IN (
250
            SELECT bus_id
251
            FROM grid.egon_etrago_bus
252
            WHERE scn_name = '{self.scenario}'
253
            AND country = 'DE'
254
            AND carrier = '{self.carrier}'
255
        )
256
        """
257
258
    def _get_reference_capacity(self):
259
        """
260
        Calculate reference generation capacity from SciGRID_gas + biogas data.
261
262
        Returns
263
        -------
264
        float
265
            Expected total generation capacity in MW
266
        """
267
        try:
268
            # Read SciGRID_gas natural gas productions
269
            target_file = (
270
                Path(".")
271
                / "datasets"
272
                / "gas_data"
273
                / "data"
274
                / "IGGIELGN_Productions.csv"
275
            )
276
277
            ng_generators = pd.read_csv(
278
                target_file,
279
                delimiter=";",
280
                decimal=".",
281
                usecols=["country_code", "param"],
282
            )
283
284
            ng_generators = ng_generators[
285
                ng_generators["country_code"].str.match("DE")
286
            ]
287
288
            # Sum natural gas production capacity
289
            p_ng = 0
290
            for index, row in ng_generators.iterrows():
291
                param = ast.literal_eval(row["param"])
292
                p_ng = p_ng + param["max_supply_M_m3_per_d"]
293
294
            conversion_factor = 437.5  # MCM/day to MWh/h
295
            p_ng = p_ng * conversion_factor
296
297
            # Read biogas production data
298
            basename = "Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx"
299
            target_file = (
300
                Path(".") / "data_bundle_egon_data" / "gas_data" / basename
301
            )
302
303
            conversion_factor_b = 0.01083  # m^3/h to MWh/h
304
            p_biogas = (
305
                pd.read_excel(
306
                    target_file,
307
                    usecols=["Einspeisung Biomethan [(N*m^3)/h)]"],
308
                )["Einspeisung Biomethan [(N*m^3)/h)]"].sum()
309
                * conversion_factor_b
310
            )
311
312
            total_generation = p_ng + p_biogas
313
314
            return float(total_generation)
315
316
        except Exception as e:
317
            raise ValueError(f"Error reading reference generation data: {str(e)}")
318
319 View Code Duplication
    def evaluate_df(self, df, ctx):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
320
        """
321
        Evaluate gas generators capacity against reference data.
322
323
        Parameters
324
        ----------
325
        df : pd.DataFrame
326
            DataFrame with p_nom_germany column
327
        ctx : dict
328
            Context information
329
330
        Returns
331
        -------
332
        RuleResult
333
            Validation result with success/failure status
334
        """
335
        if df.empty or df["p_nom_germany"].isna().all():
336
            return RuleResult(
337
                rule_id=self.rule_id,
338
                task=self.task,
339
                table=self.table,
340
                kind=self.kind,
341
                success=False,
342
                message=f"No {self.carrier} generators found for scenario {self.scenario}",
343
                severity=Severity.WARNING,
344
                schema=self.schema,
345
                table_name=self.table_name,
346
                rule_class=self.__class__.__name__
347
            )
348
349
        observed_capacity = float(df["p_nom_germany"].values[0])
350
351
        # Get expected capacity from reference data
352
        try:
353
            expected_capacity = self._get_reference_capacity()
354
        except Exception as e:
355
            return RuleResult(
356
                rule_id=self.rule_id,
357
                task=self.task,
358
                table=self.table,
359
                kind=self.kind,
360
                success=False,
361
                message=str(e),
362
                severity=Severity.ERROR,
363
                schema=self.schema,
364
                table_name=self.table_name,
365
                rule_class=self.__class__.__name__
366
            )
367
368
        # Calculate relative deviation
369
        rtol = self.params.get("rtol", 0.10)
370
        deviation = abs(observed_capacity - expected_capacity) / expected_capacity
371
372
        success = deviation <= rtol
373
        deviation_pct = deviation * 100
374
375
        if success:
376
            return RuleResult(
377
                rule_id=self.rule_id,
378
                task=self.task,
379
                table=self.table,
380
                kind=self.kind,
381
                success=True,
382
                observed=observed_capacity,
383
                expected=expected_capacity,
384
                message=(
385
                    f"{self.carrier} generator capacity valid for {self.scenario}: "
386
                    f"{observed_capacity:.2f} MW (deviation: {deviation_pct:.2f}%, "
387
                    f"tolerance: {rtol*100:.2f}%)"
388
                ),
389
                severity=Severity.INFO,
390
                schema=self.schema,
391
                table_name=self.table_name,
392
                rule_class=self.__class__.__name__
393
            )
394
        else:
395
            return RuleResult(
396
                rule_id=self.rule_id,
397
                task=self.task,
398
                table=self.table,
399
                kind=self.kind,
400
                success=False,
401
                observed=observed_capacity,
402
                expected=expected_capacity,
403
                message=(
404
                    f"{self.carrier} generator capacity deviation too large for {self.scenario}: "
405
                    f"{observed_capacity:.2f} vs {expected_capacity:.2f} MW expected "
406
                    f"(deviation: {deviation_pct:.2f}%, tolerance: {rtol*100:.2f}%)"
407
                ),
408
                severity=Severity.ERROR,
409
                schema=self.schema,
410
                table_name=self.table_name,
411
                rule_class=self.__class__.__name__
412
            )
413