Passed
Pull Request — dev (#1375)
by
unknown
02:18
created

custom.sanity.electricity_capacity   A

Complexity

Total Complexity 13

Size/Duplication

Total Lines 274
Duplicated Lines 18.98 %

Importance

Changes 0
Metric Value
wmc 13
eloc 131
dl 52
loc 274
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A ElectricityCapacityComparison.get_query() 0 43 3
A ElectricityCapacityComparison.__init__() 0 46 1
D ElectricityCapacityComparison.evaluate_df() 52 138 9

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
"""
2
Sanity check validation rules for electricity capacity comparison.
3
4
Validates that distributed capacities in etrago tables match input capacities
5
from scenario_capacities table.
6
"""
7
8
from egon_validation.rules.base import DataFrameRule, RuleResult, Severity
9
from typing import Optional, List
10
11
12
class ElectricityCapacityComparison(DataFrameRule):
13
    """
14
    Compare distributed capacity with input capacity for electricity components.
15
16
    Compares the total capacity in etrago tables (grid.egon_etrago_generator,
17
    grid.egon_etrago_storage) against the input capacity from the scenario
18
    capacities table (supply.egon_scenario_capacities).
19
20
    This validation ensures that capacity distribution is correct and no
21
    capacity is lost or incorrectly added during the distribution process.
22
    """
23
24
    def __init__(
25
        self,
26
        table: str,
27
        rule_id: str,
28
        scenario: str = "eGon2035",
29
        carrier: str = "wind_onshore",
30
        component_type: str = "generator",
31
        output_carriers: Optional[List[str]] = None,
32
        rtol: float = 0.10,
33
        **kwargs
34
    ):
35
        """
36
        Parameters
37
        ----------
38
        table : str
39
            Target table (grid.egon_etrago_generator or grid.egon_etrago_storage)
40
        rule_id : str
41
            Unique identifier for this validation rule
42
        scenario : str
43
            Scenario name ("eGon2035" or "eGon100RE")
44
        carrier : str
45
            Carrier type for the input table (supply.egon_scenario_capacities)
46
        component_type : str
47
            Type of component ("generator", "storage", or "link")
48
        output_carriers : List[str], optional
49
            List of carrier names in output table. If None, uses carrier parameter.
50
            Useful for biomass which maps to multiple output carriers.
51
        rtol : float
52
            Relative tolerance for capacity deviation (default: 0.10 = 10%)
53
        """
54
        super().__init__(
55
            rule_id=rule_id,
56
            table=table,
57
            scenario=scenario,
58
            carrier=carrier,
59
            component_type=component_type,
60
            output_carriers=output_carriers,
61
            rtol=rtol,
62
            **kwargs
63
        )
64
        self.kind = "sanity"
65
        self.scenario = scenario
66
        self.carrier = carrier
67
        self.component_type = component_type
68
        self.output_carriers = output_carriers or [carrier]
69
        self.rtol = rtol
70
71
    def get_query(self, ctx):
72
        """
73
        Query to compare input and output capacities.
74
75
        Returns a query that:
76
        1. Sums output capacity from etrago table for German buses
77
        2. Sums input capacity from scenario_capacities table
78
        3. Returns both values for comparison
79
        """
80
        # Build carrier filter for output table
81
        if len(self.output_carriers) == 1:
82
            carrier_filter = f"carrier = '{self.output_carriers[0]}'"
83
        else:
84
            carriers_str = "', '".join(self.output_carriers)
85
            carrier_filter = f"carrier IN ('{carriers_str}')"
86
87
        # Build bus filter based on component type
88
        # Links have bus0 and bus1, generators/storage have bus
89
        if self.component_type == "link":
90
            bus_filter = f"""
91
            AND (bus0 IN (
92
                SELECT bus_id
93
                FROM grid.egon_etrago_bus
94
                WHERE scn_name = '{self.scenario}'
95
                AND country = 'DE'
96
            ) OR bus1 IN (
97
                SELECT bus_id
98
                FROM grid.egon_etrago_bus
99
                WHERE scn_name = '{self.scenario}'
100
                AND country = 'DE'
101
            ))
102
            """
103
        else:
104
            bus_filter = f"""
105
            AND bus IN (
106
                SELECT bus_id
107
                FROM grid.egon_etrago_bus
108
                WHERE scn_name = '{self.scenario}'
109
                AND country = 'DE'
110
            )
111
            """
112
113
        return f"""
114
        WITH output_capacity AS (
115
            SELECT
116
                COALESCE(SUM(p_nom::numeric), 0) as output_capacity_mw
117
            FROM {self.table}
118
            WHERE scn_name = '{self.scenario}'
119
            AND {carrier_filter}
120
            {bus_filter}
121
        ),
122
        input_capacity AS (
123
            SELECT
124
                COALESCE(SUM(capacity::numeric), 0) as input_capacity_mw
125
            FROM supply.egon_scenario_capacities
126
            WHERE carrier = '{self.carrier}'
127
            AND scenario_name = '{self.scenario}'
128
        )
129
        SELECT
130
            o.output_capacity_mw,
131
            i.input_capacity_mw
132
        FROM output_capacity o
133
        CROSS JOIN input_capacity i
134
        """
135
136
    def evaluate_df(self, df, ctx):
137
        """
138
        Evaluate capacity comparison.
139
140
        Parameters
141
        ----------
142
        df : pd.DataFrame
143
            DataFrame with output_capacity_mw and input_capacity_mw columns
144
        ctx : dict
145
            Context information
146
147
        Returns
148
        -------
149
        RuleResult
150
            Validation result with success/failure status
151
        """
152
        if df.empty:
153
            return RuleResult(
154
                rule_id=self.rule_id,
155
                task=self.task,
156
                table=self.table,
157
                kind=self.kind,
158
                success=False,
159
                message=f"No data found for {self.carrier} capacity comparison",
160
                severity=Severity.ERROR,
161
                schema=self.schema,
162
                table_name=self.table_name,
163
                rule_class=self.__class__.__name__
164
            )
165
166
        output_capacity = float(df["output_capacity_mw"].values[0])
167
        input_capacity = float(df["input_capacity_mw"].values[0])
168
169
        # Case 1: Both zero - OK, no capacity needed
170 View Code Duplication
        if output_capacity == 0 and input_capacity == 0:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
171
            return RuleResult(
172
                rule_id=self.rule_id,
173
                task=self.task,
174
                table=self.table,
175
                kind=self.kind,
176
                success=True,
177
                observed=0.0,
178
                expected=0.0,
179
                message=(
180
                    f"No {self.carrier} {self.component_type} capacity needed "
181
                    f"for {self.scenario} (both input and output are zero)"
182
                ),
183
                severity=Severity.INFO,
184
                schema=self.schema,
185
                table_name=self.table_name,
186
                rule_class=self.__class__.__name__
187
            )
188
189
        # Case 2: Input > 0 but output = 0 - ERROR
190 View Code Duplication
        if input_capacity > 0 and output_capacity == 0:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
191
            return RuleResult(
192
                rule_id=self.rule_id,
193
                task=self.task,
194
                table=self.table,
195
                kind=self.kind,
196
                success=False,
197
                observed=0.0,
198
                expected=input_capacity,
199
                message=(
200
                    f"{self.carrier} {self.component_type} capacity was not distributed at all! "
201
                    f"Input: {input_capacity:.2f} MW, Output: 0 MW for {self.scenario}"
202
                ),
203
                severity=Severity.ERROR,
204
                schema=self.schema,
205
                table_name=self.table_name,
206
                rule_class=self.__class__.__name__
207
            )
208
209
        # Case 3: Output > 0 but input = 0 - ERROR
210 View Code Duplication
        if output_capacity > 0 and input_capacity == 0:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
211
            return RuleResult(
212
                rule_id=self.rule_id,
213
                task=self.task,
214
                table=self.table,
215
                kind=self.kind,
216
                success=False,
217
                observed=output_capacity,
218
                expected=0.0,
219
                message=(
220
                    f"{self.carrier} {self.component_type} capacity was distributed "
221
                    f"even though no input was provided! "
222
                    f"Output: {output_capacity:.2f} MW, Input: 0 MW for {self.scenario}"
223
                ),
224
                severity=Severity.ERROR,
225
                schema=self.schema,
226
                table_name=self.table_name,
227
                rule_class=self.__class__.__name__
228
            )
229
230
        # Case 4: Both > 0 - Check deviation
231
        deviation = abs(output_capacity - input_capacity) / input_capacity
232
        deviation_pct = deviation * 100
233
        error_pct = ((output_capacity - input_capacity) / input_capacity) * 100
234
235
        success = deviation <= self.rtol
236
237
        if success:
238
            return RuleResult(
239
                rule_id=self.rule_id,
240
                task=self.task,
241
                table=self.table,
242
                kind=self.kind,
243
                success=True,
244
                observed=output_capacity,
245
                expected=input_capacity,
246
                message=(
247
                    f"{self.carrier} {self.component_type} capacity valid for {self.scenario}: "
248
                    f"Output: {output_capacity:.2f} MW, Input: {input_capacity:.2f} MW, "
249
                    f"Deviation: {error_pct:+.2f}% (tolerance: ±{self.rtol*100:.2f}%)"
250
                ),
251
                severity=Severity.INFO,
252
                schema=self.schema,
253
                table_name=self.table_name,
254
                rule_class=self.__class__.__name__
255
            )
256
        else:
257
            return RuleResult(
258
                rule_id=self.rule_id,
259
                task=self.task,
260
                table=self.table,
261
                kind=self.kind,
262
                success=False,
263
                observed=output_capacity,
264
                expected=input_capacity,
265
                message=(
266
                    f"{self.carrier} {self.component_type} capacity deviation too large for {self.scenario}: "
267
                    f"Output: {output_capacity:.2f} MW, Input: {input_capacity:.2f} MW, "
268
                    f"Deviation: {error_pct:+.2f}% (tolerance: ±{self.rtol*100:.2f}%)"
269
                ),
270
                severity=Severity.ERROR,
271
                schema=self.schema,
272
                table_name=self.table_name,
273
                rule_class=self.__class__.__name__
274
            )
275