Passed
Pull Request — dev (#1375)
by
unknown
02:18
created

ElectricityCapacityComparison.__init__()   A

Complexity

Conditions 1

Size

Total Lines 46
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 46
rs 9.28
c 0
b 0
f 0
cc 1
nop 9

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
Sanity check validation rules for electricity capacity comparison.
3
4
Validates that distributed capacities in etrago tables match input capacities
5
from scenario_capacities table.
6
"""
7
8
from egon_validation.rules.base import DataFrameRule, RuleResult, Severity
9
from typing import Optional, List
10
11
12
class ElectricityCapacityComparison(DataFrameRule):
13
    """
14
    Compare distributed capacity with input capacity for electricity components.
15
16
    Compares the total capacity in etrago tables (grid.egon_etrago_generator,
17
    grid.egon_etrago_storage) against the input capacity from the scenario
18
    capacities table (supply.egon_scenario_capacities).
19
20
    This validation ensures that capacity distribution is correct and no
21
    capacity is lost or incorrectly added during the distribution process.
22
    """
23
24
    def __init__(
25
        self,
26
        table: str,
27
        rule_id: str,
28
        scenario: str = "eGon2035",
29
        carrier: str = "wind_onshore",
30
        component_type: str = "generator",
31
        output_carriers: Optional[List[str]] = None,
32
        rtol: float = 0.10,
33
        **kwargs
34
    ):
35
        """
36
        Parameters
37
        ----------
38
        table : str
39
            Target table (grid.egon_etrago_generator or grid.egon_etrago_storage)
40
        rule_id : str
41
            Unique identifier for this validation rule
42
        scenario : str
43
            Scenario name ("eGon2035" or "eGon100RE")
44
        carrier : str
45
            Carrier type for the input table (supply.egon_scenario_capacities)
46
        component_type : str
47
            Type of component ("generator", "storage", or "link")
48
        output_carriers : List[str], optional
49
            List of carrier names in output table. If None, uses carrier parameter.
50
            Useful for biomass which maps to multiple output carriers.
51
        rtol : float
52
            Relative tolerance for capacity deviation (default: 0.10 = 10%)
53
        """
54
        super().__init__(
55
            rule_id=rule_id,
56
            table=table,
57
            scenario=scenario,
58
            carrier=carrier,
59
            component_type=component_type,
60
            output_carriers=output_carriers,
61
            rtol=rtol,
62
            **kwargs
63
        )
64
        self.kind = "sanity"
65
        self.scenario = scenario
66
        self.carrier = carrier
67
        self.component_type = component_type
68
        self.output_carriers = output_carriers or [carrier]
69
        self.rtol = rtol
70
71
    def get_query(self, ctx):
72
        """
73
        Query to compare input and output capacities.
74
75
        Returns a query that:
76
        1. Sums output capacity from etrago table for German buses
77
        2. Sums input capacity from scenario_capacities table
78
        3. Returns both values for comparison
79
        """
80
        # Build carrier filter for output table
81
        if len(self.output_carriers) == 1:
82
            carrier_filter = f"carrier = '{self.output_carriers[0]}'"
83
        else:
84
            carriers_str = "', '".join(self.output_carriers)
85
            carrier_filter = f"carrier IN ('{carriers_str}')"
86
87
        # Build bus filter based on component type
88
        # Links have bus0 and bus1, generators/storage have bus
89
        if self.component_type == "link":
90
            bus_filter = f"""
91
            AND (bus0 IN (
92
                SELECT bus_id
93
                FROM grid.egon_etrago_bus
94
                WHERE scn_name = '{self.scenario}'
95
                AND country = 'DE'
96
            ) OR bus1 IN (
97
                SELECT bus_id
98
                FROM grid.egon_etrago_bus
99
                WHERE scn_name = '{self.scenario}'
100
                AND country = 'DE'
101
            ))
102
            """
103
        else:
104
            bus_filter = f"""
105
            AND bus IN (
106
                SELECT bus_id
107
                FROM grid.egon_etrago_bus
108
                WHERE scn_name = '{self.scenario}'
109
                AND country = 'DE'
110
            )
111
            """
112
113
        return f"""
114
        WITH output_capacity AS (
115
            SELECT
116
                COALESCE(SUM(p_nom::numeric), 0) as output_capacity_mw
117
            FROM {self.table}
118
            WHERE scn_name = '{self.scenario}'
119
            AND {carrier_filter}
120
            {bus_filter}
121
        ),
122
        input_capacity AS (
123
            SELECT
124
                COALESCE(SUM(capacity::numeric), 0) as input_capacity_mw
125
            FROM supply.egon_scenario_capacities
126
            WHERE carrier = '{self.carrier}'
127
            AND scenario_name = '{self.scenario}'
128
        )
129
        SELECT
130
            o.output_capacity_mw,
131
            i.input_capacity_mw
132
        FROM output_capacity o
133
        CROSS JOIN input_capacity i
134
        """
135
136
    def evaluate_df(self, df, ctx):
137
        """
138
        Evaluate capacity comparison.
139
140
        Parameters
141
        ----------
142
        df : pd.DataFrame
143
            DataFrame with output_capacity_mw and input_capacity_mw columns
144
        ctx : dict
145
            Context information
146
147
        Returns
148
        -------
149
        RuleResult
150
            Validation result with success/failure status
151
        """
152
        if df.empty:
153
            return RuleResult(
154
                rule_id=self.rule_id,
155
                task=self.task,
156
                table=self.table,
157
                kind=self.kind,
158
                success=False,
159
                message=f"No data found for {self.carrier} capacity comparison",
160
                severity=Severity.ERROR,
161
                schema=self.schema,
162
                table_name=self.table_name,
163
                rule_class=self.__class__.__name__
164
            )
165
166
        output_capacity = float(df["output_capacity_mw"].values[0])
167
        input_capacity = float(df["input_capacity_mw"].values[0])
168
169
        # Case 1: Both zero - OK, no capacity needed
170 View Code Duplication
        if output_capacity == 0 and input_capacity == 0:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
171
            return RuleResult(
172
                rule_id=self.rule_id,
173
                task=self.task,
174
                table=self.table,
175
                kind=self.kind,
176
                success=True,
177
                observed=0.0,
178
                expected=0.0,
179
                message=(
180
                    f"No {self.carrier} {self.component_type} capacity needed "
181
                    f"for {self.scenario} (both input and output are zero)"
182
                ),
183
                severity=Severity.INFO,
184
                schema=self.schema,
185
                table_name=self.table_name,
186
                rule_class=self.__class__.__name__
187
            )
188
189
        # Case 2: Input > 0 but output = 0 - ERROR
190 View Code Duplication
        if input_capacity > 0 and output_capacity == 0:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
191
            return RuleResult(
192
                rule_id=self.rule_id,
193
                task=self.task,
194
                table=self.table,
195
                kind=self.kind,
196
                success=False,
197
                observed=0.0,
198
                expected=input_capacity,
199
                message=(
200
                    f"{self.carrier} {self.component_type} capacity was not distributed at all! "
201
                    f"Input: {input_capacity:.2f} MW, Output: 0 MW for {self.scenario}"
202
                ),
203
                severity=Severity.ERROR,
204
                schema=self.schema,
205
                table_name=self.table_name,
206
                rule_class=self.__class__.__name__
207
            )
208
209
        # Case 3: Output > 0 but input = 0 - ERROR
210 View Code Duplication
        if output_capacity > 0 and input_capacity == 0:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
211
            return RuleResult(
212
                rule_id=self.rule_id,
213
                task=self.task,
214
                table=self.table,
215
                kind=self.kind,
216
                success=False,
217
                observed=output_capacity,
218
                expected=0.0,
219
                message=(
220
                    f"{self.carrier} {self.component_type} capacity was distributed "
221
                    f"even though no input was provided! "
222
                    f"Output: {output_capacity:.2f} MW, Input: 0 MW for {self.scenario}"
223
                ),
224
                severity=Severity.ERROR,
225
                schema=self.schema,
226
                table_name=self.table_name,
227
                rule_class=self.__class__.__name__
228
            )
229
230
        # Case 4: Both > 0 - Check deviation
231
        deviation = abs(output_capacity - input_capacity) / input_capacity
232
        deviation_pct = deviation * 100
233
        error_pct = ((output_capacity - input_capacity) / input_capacity) * 100
234
235
        success = deviation <= self.rtol
236
237
        if success:
238
            return RuleResult(
239
                rule_id=self.rule_id,
240
                task=self.task,
241
                table=self.table,
242
                kind=self.kind,
243
                success=True,
244
                observed=output_capacity,
245
                expected=input_capacity,
246
                message=(
247
                    f"{self.carrier} {self.component_type} capacity valid for {self.scenario}: "
248
                    f"Output: {output_capacity:.2f} MW, Input: {input_capacity:.2f} MW, "
249
                    f"Deviation: {error_pct:+.2f}% (tolerance: ±{self.rtol*100:.2f}%)"
250
                ),
251
                severity=Severity.INFO,
252
                schema=self.schema,
253
                table_name=self.table_name,
254
                rule_class=self.__class__.__name__
255
            )
256
        else:
257
            return RuleResult(
258
                rule_id=self.rule_id,
259
                task=self.task,
260
                table=self.table,
261
                kind=self.kind,
262
                success=False,
263
                observed=output_capacity,
264
                expected=input_capacity,
265
                message=(
266
                    f"{self.carrier} {self.component_type} capacity deviation too large for {self.scenario}: "
267
                    f"Output: {output_capacity:.2f} MW, Input: {input_capacity:.2f} MW, "
268
                    f"Deviation: {error_pct:+.2f}% (tolerance: ±{self.rtol*100:.2f}%)"
269
                ),
270
                severity=Severity.ERROR,
271
                schema=self.schema,
272
                table_name=self.table_name,
273
                rule_class=self.__class__.__name__
274
            )
275