Passed
Pull Request — dev (#1375)
by
unknown
02:18
created

custom.sanity.heat_demand   A

Complexity

Total Complexity 7

Size/Duplication

Total Lines 163
Duplicated Lines 7.36 %

Importance

Changes 0
Metric Value
wmc 7
eloc 69
dl 12
loc 163
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A HeatDemandValidation.get_query() 0 10 1
A HeatDemandValidation.__init__() 0 30 1
B HeatDemandValidation.evaluate_df() 12 78 5

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
"""
2
Sanity check validation rules for heat demand.
3
4
Validates that heat demand timeseries match expected values from peta_heat.
5
"""
6
7
from egon_validation.rules.base import DataFrameRule, RuleResult, Severity
8
9
10
class HeatDemandValidation(DataFrameRule):
11
    """
12
    Validate annual heat demand against peta_heat reference values.
13
14
    Compares the sum of rural_heat and central_heat load timeseries
15
    against the demand from egon_peta_heat table to ensure demand is
16
    correctly distributed.
17
    """
18
19
    def __init__(
20
        self,
21
        table: str,
22
        rule_id: str,
23
        scenario: str = "eGon2035",
24
        rtol: float = 0.02,
25
        **kwargs
26
    ):
27
        """
28
        Parameters
29
        ----------
30
        table : str
31
            Target table (grid.egon_etrago_load)
32
        rule_id : str
33
            Unique identifier for this validation rule
34
        scenario : str
35
            Scenario name ("eGon2035" or "eGon100RE")
36
        rtol : float
37
            Relative tolerance for deviation (default: 0.02 = 2%)
38
        """
39
        super().__init__(
40
            rule_id=rule_id,
41
            table=table,
42
            scenario=scenario,
43
            rtol=rtol,
44
            **kwargs
45
        )
46
        self.kind = "sanity"
47
        self.scenario = scenario
48
        self.rtol = rtol
49
50
    def get_query(self, ctx):
51
        """
52
        Query to compare heat demand output vs input.
53
54
        Returns a query that:
55
        1. Sums rural_heat + central_heat timeseries from etrago_load
56
        2. Sums demand from egon_peta_heat
57
        3. Returns both values for comparison
58
        """
59
        return f"""
60
        WITH output_demand AS (
61
            SELECT
62
                SUM((SELECT SUM(p) FROM UNNEST(b.p_set) p)) / 1000000 as demand_twh
63
            FROM grid.egon_etrago_load a
64
            JOIN grid.egon_etrago_load_timeseries b ON (a.load_id = b.load_id)
65
            JOIN grid.egon_etrago_bus c ON (a.bus = c.bus_id)
66
            WHERE b.scn_name = '{self.scenario}'
67
            AND a.scn_name = '{self.scenario}'
68
            AND c.scn_name = '{self.scenario}'
69
            AND c.country = 'DE'
70
            AND a.carrier IN ('rural_heat', 'central_heat')
71
        ),
72
        input_demand AS (
73
            SELECT
74
                SUM(demand / 1000000) as demand_twh
75
            FROM demand.egon_peta_heat
76
            WHERE scenario = '{self.scenario}'
77
        )
78
        SELECT
79
            o.demand_twh as output_demand_twh,
80
            i.demand_twh as input_demand_twh
81
        FROM output_demand o
82
        CROSS JOIN input_demand i
83
        """
84
85
    def evaluate_df(self, df, ctx):
86
        """
87
        Evaluate heat demand comparison.
88
89
        Parameters
90
        ----------
91
        df : pd.DataFrame
92
            DataFrame with output_demand_twh and input_demand_twh columns
93
        ctx : dict
94
            Context information
95
96
        Returns
97
        -------
98
        RuleResult
99
            Validation result with success/failure status
100
        """
101 View Code Duplication
        if df.empty or df["output_demand_twh"].isna().all() or df["input_demand_twh"].isna().all():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
102
            return RuleResult(
103
                rule_id=self.rule_id,
104
                task=self.task,
105
                table=self.table,
106
                kind=self.kind,
107
                success=False,
108
                message=f"No heat demand data found for {self.scenario}",
109
                severity=Severity.ERROR,
110
                schema=self.schema,
111
                table_name=self.table_name,
112
                rule_class=self.__class__.__name__
113
            )
114
115
        output_twh = float(df["output_demand_twh"].values[0])
116
        input_twh = float(df["input_demand_twh"].values[0])
117
118
        # Calculate deviation
119
        deviation = abs(output_twh - input_twh) / input_twh
120
        deviation_pct = deviation * 100
121
        diff_twh = output_twh - input_twh
122
123
        success = deviation <= self.rtol
124
125
        if success:
126
            return RuleResult(
127
                rule_id=self.rule_id,
128
                task=self.task,
129
                table=self.table,
130
                kind=self.kind,
131
                success=True,
132
                observed=output_twh,
133
                expected=input_twh,
134
                message=(
135
                    f"Heat demand valid for {self.scenario}: "
136
                    f"{output_twh:.2f} TWh vs {input_twh:.2f} TWh expected "
137
                    f"(deviation: {deviation_pct:.2f}%, tolerance: {self.rtol*100:.2f}%)"
138
                ),
139
                severity=Severity.INFO,
140
                schema=self.schema,
141
                table_name=self.table_name,
142
                rule_class=self.__class__.__name__
143
            )
144
        else:
145
            return RuleResult(
146
                rule_id=self.rule_id,
147
                task=self.task,
148
                table=self.table,
149
                kind=self.kind,
150
                success=False,
151
                observed=output_twh,
152
                expected=input_twh,
153
                message=(
154
                    f"Heat demand deviation too large for {self.scenario}: "
155
                    f"{output_twh:.2f} TWh vs {input_twh:.2f} TWh expected "
156
                    f"(diff: {diff_twh:+.2f} TWh, deviation: {deviation_pct:.2f}%, "
157
                    f"tolerance: {self.rtol*100:.2f}%)"
158
                ),
159
                severity=Severity.ERROR,
160
                schema=self.schema,
161
                table_name=self.table_name,
162
                rule_class=self.__class__.__name__
163
            )
164