custom.sanity.gas_stores.H2SaltcavernStoresCapacity.evaluate_df() - Code Metrics - Inspection of "feature/validation_integration" - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#1375)

unknown

created 2026-01-19 10:52 UTC

H2SaltcavernStoresCapacity.evaluate_df() B

↳ Parent: custom.sanity.gas_stores

Complexity

Conditions

Size

Total Lines	96
Code Lines	64

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	64
dl	0
loc	96
rs	7.7115
c	0
b	0
f	0
cc	5
nop	3

How to fix Long Method

"""
Sanity check validation rules for gas storage components.

Validates CH4 and H2 storage capacities against expected values from
grid capacities and external data sources.
"""

from egon_validation.rules.base import DataFrameRule, RuleResult, Severity

from egon.data import config
from egon.data.datasets.hydrogen_etrago.storage import (
    calculate_and_map_saltcavern_storage_potential
)


class CH4StoresCapacity(DataFrameRule):
    """
    Validate CH4 store capacity in Germany.

    Compares the sum of CH4 store capacities in the database against the
    expected capacity calculated from:
    - CH4 grid capacity allocation
    - Total CH4 store capacity in Germany (source: GIE)

    The check allows for small deviations between observed and expected values.
    """

    def __init__(self, table: str, rule_id: str, scenario: str = "eGon2035",
                 rtol: float = 0.02, **kwargs):
        """
        Parameters
        ----------
        table : str
            Target table (grid.egon_etrago_store)
        rule_id : str
            Unique identifier for this validation rule
        scenario : str
            Scenario name ("eGon2035" or "eGon100RE")
        rtol : float
            Relative tolerance for capacity deviation (default: 0.02 = 2%)
        """
        super().__init__(rule_id=rule_id, table=table, scenario=scenario,
                         rtol=rtol, **kwargs)
        self.kind = "sanity"
        self.scenario = scenario

    def get_query(self, ctx):
        """
        Query to get total CH4 store capacity in Germany.

        Returns a query that sums all CH4 store capacities for German buses
        in the specified scenario.
        """
        return f"""
        SELECT SUM(e_nom::numeric) as e_nom_germany
        FROM grid.egon_etrago_store
        WHERE scn_name = '{self.scenario}'
        AND carrier = 'CH4'
        AND bus IN (
            SELECT bus_id
            FROM grid.egon_etrago_bus
            WHERE scn_name = '{self.scenario}'
            AND country = 'DE'
            AND carrier = 'CH4'
        )
        """

    def evaluate_df(self, df, ctx):
        """
        Evaluate CH4 store capacity against expected values.

        Parameters
        ----------
        df : pd.DataFrame
            DataFrame with e_nom_germany column
        ctx : dict
            Context information

        Returns
        -------
        RuleResult
            Validation result with success/failure status
        """
        if df.empty or df["e_nom_germany"].isna().all():
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=False,
                message=f"No CH4 store data found for scenario {self.scenario}",
                severity=Severity.WARNING,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )

        observed_capacity = float(df["e_nom_germany"].values[0])

        # Calculate expected capacity based on scenario
        if self.scenario == "eGon2035":
            grid_cap = 130000  # MWh
        elif self.scenario == "eGon100RE":
            # Get retrofitted share from config
            from egon.data.datasets.scenario_parameters import get_sector_parameters
            retrofitted_share = get_sector_parameters("gas", "eGon100RE")[
                "retrofitted_CH4pipeline-to-H2pipeline_share"
            ]
            grid_cap = 13000 * (1 - retrofitted_share)  # MWh
        else:
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=False,
                message=f"Unknown scenario: {self.scenario}",
                severity=Severity.ERROR,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )

        # GIE capacity: https://www.gie.eu/transparency/databases/storage-database/
        stores_cap_germany = 266424202  # MWh

        expected_capacity = stores_cap_germany + grid_cap

        # Calculate relative deviation
        rtol = self.params.get("rtol", 0.02)
        deviation = abs(observed_capacity - expected_capacity) / expected_capacity

        success = deviation <= rtol

        deviation_pct = deviation * 100

        if success:
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=True,
                observed=observed_capacity,
                expected=expected_capacity,
                message=(
                    f"CH4 stores capacity valid for {self.scenario}: "
                    f"deviation {deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
                ),
                severity=Severity.INFO,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )
        else:
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=False,
                observed=observed_capacity,
                expected=expected_capacity,
                message=(
                    f"CH4 stores capacity deviation too large for {self.scenario}: "
                    f"{deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
                ),
                severity=Severity.ERROR,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )


class H2SaltcavernStoresCapacity(DataFrameRule):
    """
    Validate H2 saltcavern store potential capacity in Germany.

    Compares the sum of H2 saltcavern potential storage capacities (e_nom_max)
    in the database against the expected capacity calculated from:
    - Area fractions around substations in federal states
    - Estimated total hydrogen storage potential per federal state (InSpEE-DS)

    The check allows for small deviations between observed and expected values.
    """

    def __init__(self, table: str, rule_id: str, scenario: str = "eGon2035",
                 rtol: float = 0.02, **kwargs):
        """
        Parameters
        ----------
        table : str
            Target table (grid.egon_etrago_store)
        rule_id : str
            Unique identifier for this validation rule
        scenario : str
            Scenario name ("eGon2035" or "eGon100RE")
        rtol : float
            Relative tolerance for capacity deviation (default: 0.02 = 2%)
        """
        super().__init__(rule_id=rule_id, table=table, scenario=scenario,
                         rtol=rtol, **kwargs)
        self.kind = "sanity"
        self.scenario = scenario

    def get_query(self, ctx):
        """
        Query to get total H2 saltcavern potential storage capacity in Germany.

        Returns a query that sums all H2_underground store e_nom_max capacities
        for German H2_saltcavern buses in the specified scenario.
        """
        return f"""
        SELECT SUM(e_nom_max::numeric) as e_nom_max_germany
        FROM grid.egon_etrago_store
        WHERE scn_name = '{self.scenario}'
        AND carrier = 'H2_underground'
        AND bus IN (
            SELECT bus_id
            FROM grid.egon_etrago_bus
            WHERE scn_name = '{self.scenario}'
            AND country = 'DE'
            AND carrier = 'H2_saltcavern'
        )
        """

    def evaluate_df(self, df, ctx):
        """
        Evaluate H2 saltcavern storage capacity against expected values.

        Parameters
        ----------
        df : pd.DataFrame
            DataFrame with e_nom_max_germany column
        ctx : dict
            Context information

        Returns
        -------
        RuleResult
            Validation result with success/failure status
        """
        if df.empty or df["e_nom_max_germany"].isna().all():
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=False,
                message=f"No H2 saltcavern store data found for scenario {self.scenario}",
                severity=Severity.WARNING,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )

        observed_capacity = float(df["e_nom_max_germany"].values[0])

        # Calculate expected capacity from saltcavern potential
        try:
            storage_potentials = calculate_and_map_saltcavern_storage_potential()
            storage_potentials["storage_potential"] = (
                storage_potentials["area_fraction"] * storage_potentials["potential"]
            )
            expected_capacity = sum(storage_potentials["storage_potential"].to_list())
        except Exception as e:
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=False,
                message=f"Error calculating expected H2 saltcavern capacity: {str(e)}",
                severity=Severity.ERROR,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )

        # Calculate relative deviation
        rtol = self.params.get("rtol", 0.02)
        deviation = abs(observed_capacity - expected_capacity) / expected_capacity

        success = deviation <= rtol

        deviation_pct = deviation * 100

        if success:
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=True,
                observed=observed_capacity,
                expected=expected_capacity,
                message=(
                    f"H2 saltcavern stores capacity valid for {self.scenario}: "
                    f"deviation {deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
                ),
                severity=Severity.INFO,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )
        else:
            return RuleResult(
                rule_id=self.rule_id,
                task=self.task,
                table=self.table,
                kind=self.kind,
                success=False,
                observed=observed_capacity,
                expected=expected_capacity,
                message=(
                    f"H2 saltcavern stores capacity deviation too large for {self.scenario}: "
                    f"{deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
                ),
                severity=Severity.ERROR,
                schema=self.schema,
                table_name=self.table_name,
                rule_class=self.__class__.__name__
            )


1			"""
2			Sanity check validation rules for gas storage components.
3
4			Validates CH4 and H2 storage capacities against expected values from
5			grid capacities and external data sources.
6			"""
7
8			from egon_validation.rules.base import DataFrameRule, RuleResult, Severity
9
10			from egon.data import config
11			from egon.data.datasets.hydrogen_etrago.storage import (
12			calculate_and_map_saltcavern_storage_potential
13			)
14
15
16			class CH4StoresCapacity(DataFrameRule):
17			"""
18			Validate CH4 store capacity in Germany.
19
20			Compares the sum of CH4 store capacities in the database against the
21			expected capacity calculated from:
22			- CH4 grid capacity allocation
23			- Total CH4 store capacity in Germany (source: GIE)
24
25			The check allows for small deviations between observed and expected values.
26			"""
27
28			def __init__(self, table: str, rule_id: str, scenario: str = "eGon2035",
29			rtol: float = 0.02, **kwargs):
30			"""
31			Parameters
32			----------
33			table : str
34			Target table (grid.egon_etrago_store)
35			rule_id : str
36			Unique identifier for this validation rule
37			scenario : str
38			Scenario name ("eGon2035" or "eGon100RE")
39			rtol : float
40			Relative tolerance for capacity deviation (default: 0.02 = 2%)
41			"""
42			super().__init__(rule_id=rule_id, table=table, scenario=scenario,
43			rtol=rtol, **kwargs)
44			self.kind = "sanity"
45			self.scenario = scenario
46
47			def get_query(self, ctx):
48			"""
49			Query to get total CH4 store capacity in Germany.
50
51			Returns a query that sums all CH4 store capacities for German buses
52			in the specified scenario.
53			"""
54			return f"""
55			SELECT SUM(e_nom::numeric) as e_nom_germany
56			FROM grid.egon_etrago_store
57			WHERE scn_name = '{self.scenario}'
58			AND carrier = 'CH4'
59			AND bus IN (
60			SELECT bus_id
61			FROM grid.egon_etrago_bus
62			WHERE scn_name = '{self.scenario}'
63			AND country = 'DE'
64			AND carrier = 'CH4'
65			)
66			"""
67
68			def evaluate_df(self, df, ctx):
69			"""
70			Evaluate CH4 store capacity against expected values.
71
72			Parameters
73			----------
74			df : pd.DataFrame
75			DataFrame with e_nom_germany column
76			ctx : dict
77			Context information
78
79			Returns
80			-------
81			RuleResult
82			Validation result with success/failure status
83			"""
84			if df.empty or df["e_nom_germany"].isna().all():
85			return RuleResult(
86			rule_id=self.rule_id,
87			task=self.task,
88			table=self.table,
89			kind=self.kind,
90			success=False,
91			message=f"No CH4 store data found for scenario {self.scenario}",
92			severity=Severity.WARNING,
93			schema=self.schema,
94			table_name=self.table_name,
95			rule_class=self.__class__.__name__
96			)
97
98			observed_capacity = float(df["e_nom_germany"].values[0])
99
100			# Calculate expected capacity based on scenario
101			if self.scenario == "eGon2035":
102			grid_cap = 130000 # MWh
103			elif self.scenario == "eGon100RE":
104			# Get retrofitted share from config
105			from egon.data.datasets.scenario_parameters import get_sector_parameters
106			retrofitted_share = get_sector_parameters("gas", "eGon100RE")[
107			"retrofitted_CH4pipeline-to-H2pipeline_share"
108			]
109			grid_cap = 13000 * (1 - retrofitted_share) # MWh
110			else:
111			return RuleResult(
112			rule_id=self.rule_id,
113			task=self.task,
114			table=self.table,
115			kind=self.kind,
116			success=False,
117			message=f"Unknown scenario: {self.scenario}",
118			severity=Severity.ERROR,
119			schema=self.schema,
120			table_name=self.table_name,
121			rule_class=self.__class__.__name__
122			)
123
124			# GIE capacity: https://www.gie.eu/transparency/databases/storage-database/
125			stores_cap_germany = 266424202 # MWh
126
127			expected_capacity = stores_cap_germany + grid_cap
128
129			# Calculate relative deviation
130			rtol = self.params.get("rtol", 0.02)
131			deviation = abs(observed_capacity - expected_capacity) / expected_capacity
132
133			success = deviation <= rtol
134
135			deviation_pct = deviation * 100
136
137			if success:
138			return RuleResult(
139			rule_id=self.rule_id,
140			task=self.task,
141			table=self.table,
142			kind=self.kind,
143			success=True,
144			observed=observed_capacity,
145			expected=expected_capacity,
146			message=(
147			f"CH4 stores capacity valid for {self.scenario}: "
148			f"deviation {deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
149			),
150			severity=Severity.INFO,
151			schema=self.schema,
152			table_name=self.table_name,
153			rule_class=self.__class__.__name__
154			)
155			else:
156			return RuleResult(
157			rule_id=self.rule_id,
158			task=self.task,
159			table=self.table,
160			kind=self.kind,
161			success=False,
162			observed=observed_capacity,
163			expected=expected_capacity,
164			message=(
165			f"CH4 stores capacity deviation too large for {self.scenario}: "
166			f"{deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
167			),
168			severity=Severity.ERROR,
169			schema=self.schema,
170			table_name=self.table_name,
171			rule_class=self.__class__.__name__
172			)
173
174
175			class H2SaltcavernStoresCapacity(DataFrameRule):
176			"""
177			Validate H2 saltcavern store potential capacity in Germany.
178
179			Compares the sum of H2 saltcavern potential storage capacities (e_nom_max)
180			in the database against the expected capacity calculated from:
181			- Area fractions around substations in federal states
182			- Estimated total hydrogen storage potential per federal state (InSpEE-DS)
183
184			The check allows for small deviations between observed and expected values.
185			"""
186
187			def __init__(self, table: str, rule_id: str, scenario: str = "eGon2035",
188			rtol: float = 0.02, **kwargs):
189			"""
190			Parameters
191			----------
192			table : str
193			Target table (grid.egon_etrago_store)
194			rule_id : str
195			Unique identifier for this validation rule
196			scenario : str
197			Scenario name ("eGon2035" or "eGon100RE")
198			rtol : float
199			Relative tolerance for capacity deviation (default: 0.02 = 2%)
200			"""
201			super().__init__(rule_id=rule_id, table=table, scenario=scenario,
202			rtol=rtol, **kwargs)
203			self.kind = "sanity"
204			self.scenario = scenario
205
206			def get_query(self, ctx):
207			"""
208			Query to get total H2 saltcavern potential storage capacity in Germany.
209
210			Returns a query that sums all H2_underground store e_nom_max capacities
211			for German H2_saltcavern buses in the specified scenario.
212			"""
213			return f"""
214			SELECT SUM(e_nom_max::numeric) as e_nom_max_germany
215			FROM grid.egon_etrago_store
216			WHERE scn_name = '{self.scenario}'
217			AND carrier = 'H2_underground'
218			AND bus IN (
219			SELECT bus_id
220			FROM grid.egon_etrago_bus
221			WHERE scn_name = '{self.scenario}'
222			AND country = 'DE'
223			AND carrier = 'H2_saltcavern'
224			)
225			"""
226
227			def evaluate_df(self, df, ctx):
228			"""
229			Evaluate H2 saltcavern storage capacity against expected values.
230
231			Parameters
232			----------
233			df : pd.DataFrame
234			DataFrame with e_nom_max_germany column
235			ctx : dict
236			Context information
237
238			Returns
239			-------
240			RuleResult
241			Validation result with success/failure status
242			"""
243			if df.empty or df["e_nom_max_germany"].isna().all():
244			return RuleResult(
245			rule_id=self.rule_id,
246			task=self.task,
247			table=self.table,
248			kind=self.kind,
249			success=False,
250			message=f"No H2 saltcavern store data found for scenario {self.scenario}",
251			severity=Severity.WARNING,
252			schema=self.schema,
253			table_name=self.table_name,
254			rule_class=self.__class__.__name__
255			)
256
257			observed_capacity = float(df["e_nom_max_germany"].values[0])
258
259			# Calculate expected capacity from saltcavern potential
260			try:
261			storage_potentials = calculate_and_map_saltcavern_storage_potential()
262			storage_potentials["storage_potential"] = (
263			storage_potentials["area_fraction"] * storage_potentials["potential"]
264			)
265			expected_capacity = sum(storage_potentials["storage_potential"].to_list())
266			except Exception as e:
267			return RuleResult(
268			rule_id=self.rule_id,
269			task=self.task,
270			table=self.table,
271			kind=self.kind,
272			success=False,
273			message=f"Error calculating expected H2 saltcavern capacity: {str(e)}",
274			severity=Severity.ERROR,
275			schema=self.schema,
276			table_name=self.table_name,
277			rule_class=self.__class__.__name__
278			)
279
280			# Calculate relative deviation
281			rtol = self.params.get("rtol", 0.02)
282			deviation = abs(observed_capacity - expected_capacity) / expected_capacity
283
284			success = deviation <= rtol
285
286			deviation_pct = deviation * 100
287
288			if success:
289			return RuleResult(
290			rule_id=self.rule_id,
291			task=self.task,
292			table=self.table,
293			kind=self.kind,
294			success=True,
295			observed=observed_capacity,
296			expected=expected_capacity,
297			message=(
298			f"H2 saltcavern stores capacity valid for {self.scenario}: "
299			f"deviation {deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
300			),
301			severity=Severity.INFO,
302			schema=self.schema,
303			table_name=self.table_name,
304			rule_class=self.__class__.__name__
305			)
306			else:
307			return RuleResult(
308			rule_id=self.rule_id,
309			task=self.task,
310			table=self.table,
311			kind=self.kind,
312			success=False,
313			observed=observed_capacity,
314			expected=expected_capacity,
315			message=(
316			f"H2 saltcavern stores capacity deviation too large for {self.scenario}: "
317			f"{deviation_pct:.2f}% (tolerance: {rtol*100:.2f}%)"
318			),
319			severity=Severity.ERROR,
320			schema=self.schema,
321			table_name=self.table_name,
322			rule_class=self.__class__.__name__
323			)
324

openego / eGon-data

Pull Request — dev (#1375)

H2SaltcavernStoresCapacity.evaluate_df() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like