Passed
Pull Request — dev (#1375)
by
unknown
02:18
created

custom.sanity.cts_demand   A

Complexity

Total Complexity 8

Size/Duplication

Total Lines 170
Duplicated Lines 94.12 %

Importance

Changes 0
Metric Value
wmc 8
eloc 89
dl 160
loc 170
rs 10
c 0
b 0
f 0

6 Methods

Rating   Name   Duplication   Size   Complexity  
A CtsHeatDemandShare.__init__() 3 3 1
A CtsElectricityDemandShare.get_query() 2 2 1
A CtsElectricityDemandShare.__init__() 3 3 1
A CtsHeatDemandShare.get_query() 2 2 1
B CtsHeatDemandShare.evaluate_df() 45 45 2
B CtsElectricityDemandShare.evaluate_df() 45 45 2

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
"""CTS (Commercial, Trade, Services) demand sanity check validation rules."""
2
3
from egon_validation.rules.base import DataFrameRule, RuleResult, Severity
4
import numpy as np
5
6
7 View Code Duplication
class CtsElectricityDemandShare(DataFrameRule):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
8
    """Validate CTS electricity demand shares sum to 1 for each substation.
9
10
    Checks that the sum of aggregated CTS electricity demand share equals 1
11
    for every substation, as the substation profile is linearly disaggregated
12
    to all buildings.
13
14
    Args:
15
        table: Primary table being validated (demand.egon_cts_electricity_demand_building_share)
16
        rule_id: Unique identifier for this validation rule
17
        rtol: Relative tolerance for comparison (default: 0.005 = 0.5%)
18
19
    Example:
20
        >>> validation = {
21
        ...     "data_quality": [
22
        ...         CtsElectricityDemandShare(
23
        ...             table="demand.egon_cts_electricity_demand_building_share",
24
        ...             rule_id="SANITY_CTS_ELECTRICITY_DEMAND_SHARE",
25
        ...             rtol=0.005
26
        ...         )
27
        ...     ]
28
        ... }
29
    """
30
31
    def __init__(self, table: str, rule_id: str, rtol: float = 0.005, **kwargs):
32
        super().__init__(rule_id=rule_id, table=table, rtol=rtol, **kwargs)
33
        self.kind = "sanity"
34
35
    def get_query(self, ctx):
36
        return """
37
        SELECT bus_id, scenario, SUM(profile_share) as total_share
38
        FROM demand.egon_cts_electricity_demand_building_share
39
        GROUP BY bus_id, scenario
40
        """
41
42
    def evaluate_df(self, df, ctx):
43
        rtol = self.params.get("rtol", 0.005)
44
45
        try:
46
            # Check that all shares sum to 1 (within tolerance)
47
            np.testing.assert_allclose(
48
                actual=df["total_share"],
49
                desired=1.0,
50
                rtol=rtol,
51
                verbose=False,
52
            )
53
54
            # Calculate actual max deviation for reporting
55
            max_diff = (df["total_share"] - 1.0).abs().max()
56
57
            return RuleResult(
58
                rule_id=self.rule_id,
59
                task=self.task,
60
                table=self.table,
61
                kind=self.kind,
62
                success=True,
63
                observed=float(max_diff),
64
                expected=rtol,
65
                message=f"CTS electricity demand shares sum to 1 for all {len(df)} bus/scenario combinations (max deviation: {max_diff:.6f}, tolerance: {rtol:.6f})",
66
                schema=self.schema,
67
                table_name=self.table_name,
68
                rule_class=self.__class__.__name__
69
            )
70
        except AssertionError:
71
            max_diff = (df["total_share"] - 1.0).abs().max()
72
            violations = df[~np.isclose(df["total_share"], 1.0, rtol=rtol)]
73
74
            return RuleResult(
75
                rule_id=self.rule_id,
76
                task=self.task,
77
                table=self.table,
78
                kind=self.kind,
79
                success=False,
80
                observed=float(max_diff),
81
                expected=rtol,
82
                message=f"CTS electricity demand share mismatch: max deviation {max_diff:.6f} exceeds tolerance {rtol:.6f}. {len(violations)} bus/scenario combinations have shares != 1.",
83
                severity=Severity.ERROR,
84
                schema=self.schema,
85
                table_name=self.table_name,
86
                rule_class=self.__class__.__name__
87
            )
88
89
90 View Code Duplication
class CtsHeatDemandShare(DataFrameRule):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
91
    """Validate CTS heat demand shares sum to 1 for each substation.
92
93
    Checks that the sum of aggregated CTS heat demand share equals 1
94
    for every substation, as the substation profile is linearly disaggregated
95
    to all buildings.
96
97
    Args:
98
        table: Primary table being validated (demand.egon_cts_heat_demand_building_share)
99
        rule_id: Unique identifier for this validation rule
100
        rtol: Relative tolerance for comparison (default: 0.005 = 0.5%)
101
102
    Example:
103
        >>> validation = {
104
        ...     "data_quality": [
105
        ...         CtsHeatDemandShare(
106
        ...             table="demand.egon_cts_heat_demand_building_share",
107
        ...             rule_id="SANITY_CTS_HEAT_DEMAND_SHARE",
108
        ...             rtol=0.005
109
        ...         )
110
        ...     ]
111
        ... }
112
    """
113
114
    def __init__(self, table: str, rule_id: str, rtol: float = 0.005, **kwargs):
115
        super().__init__(rule_id=rule_id, table=table, rtol=rtol, **kwargs)
116
        self.kind = "sanity"
117
118
    def get_query(self, ctx):
119
        return """
120
        SELECT bus_id, scenario, SUM(profile_share) as total_share
121
        FROM demand.egon_cts_heat_demand_building_share
122
        GROUP BY bus_id, scenario
123
        """
124
125
    def evaluate_df(self, df, ctx):
126
        rtol = self.params.get("rtol", 0.005)
127
128
        try:
129
            # Check that all shares sum to 1 (within tolerance)
130
            np.testing.assert_allclose(
131
                actual=df["total_share"],
132
                desired=1.0,
133
                rtol=rtol,
134
                verbose=False,
135
            )
136
137
            # Calculate actual max deviation for reporting
138
            max_diff = (df["total_share"] - 1.0).abs().max()
139
140
            return RuleResult(
141
                rule_id=self.rule_id,
142
                task=self.task,
143
                table=self.table,
144
                kind=self.kind,
145
                success=True,
146
                observed=float(max_diff),
147
                expected=rtol,
148
                message=f"CTS heat demand shares sum to 1 for all {len(df)} bus/scenario combinations (max deviation: {max_diff:.6f}, tolerance: {rtol:.6f})",
149
                schema=self.schema,
150
                table_name=self.table_name,
151
                rule_class=self.__class__.__name__
152
            )
153
        except AssertionError:
154
            max_diff = (df["total_share"] - 1.0).abs().max()
155
            violations = df[~np.isclose(df["total_share"], 1.0, rtol=rtol)]
156
157
            return RuleResult(
158
                rule_id=self.rule_id,
159
                task=self.task,
160
                table=self.table,
161
                kind=self.kind,
162
                success=False,
163
                observed=float(max_diff),
164
                expected=rtol,
165
                message=f"CTS heat demand share mismatch: max deviation {max_diff:.6f} exceeds tolerance {rtol:.6f}. {len(violations)} bus/scenario combinations have shares != 1.",
166
                severity=Severity.ERROR,
167
                schema=self.schema,
168
                table_name=self.table_name,
169
                rule_class=self.__class__.__name__
170
            )
171