Passed
Pull Request — dev (#905)
by
unknown
01:29
created

data.datasets.heat_demand_timeseries.daily   A

Complexity

Total Complexity 14

Size/Duplication

Total Lines 380
Duplicated Lines 16.58 %

Importance

Changes 0
Metric Value
wmc 14
eloc 232
dl 63
loc 380
rs 10
c 0
b 0
f 0

6 Functions

Rating   Name   Duplication   Size   Complexity  
B temperature_classes() 63 63 1
A temp_interval() 0 23 2
A h_value() 0 43 1
A temperature_profile_extract() 0 44 1
A daily_demand_shares_per_climate_zone() 0 57 2
A map_climate_zones_to_zensus() 0 47 1

2 Methods

Rating   Name   Duplication   Size   Complexity  
A IdpProfiles.__init__() 0 4 1
B IdpProfiles.get_temperature_interval() 0 44 5

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
from datetime import datetime
2
import os
3
4
from sqlalchemy import Column, Float, Integer, Text
5
from sqlalchemy.ext.declarative import declarative_base
6
import geopandas as gpd
7
import numpy as np
8
import pandas as pd
9
10
from egon.data import db
11
import egon.data.datasets.era5 as era
12
13
14
from math import ceil
15
16
17
Base = declarative_base()
18
19
20
class EgonMapZensusClimateZones(Base):
21
    __tablename__ = "egon_map_zensus_climate_zones"
22
    __table_args__ = {"schema": "boundaries"}
23
24
    zensus_population_id = Column(Integer, primary_key=True)
25
    climate_zone = Column(Text)
26
27
28
class EgonDailyHeatDemandPerClimateZone(Base):
29
    __tablename__ = "egon_daily_heat_demand_per_climate_zone"
30
    __table_args__ = {"schema": "demand"}
31
32
    climate_zone = Column(Text, primary_key=True)
33
    day_of_year = Column(Integer, primary_key=True)
34
    temperature_class = Column(Integer)
35
    heat_demand_share = Column(Float(53))
36
37
38 View Code Duplication
def temperature_classes():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
39
    return {
40
        -20: 1,
41
        -19: 1,
42
        -18: 1,
43
        -17: 1,
44
        -16: 1,
45
        -15: 1,
46
        -14: 2,
47
        -13: 2,
48
        -12: 2,
49
        -11: 2,
50
        -10: 2,
51
        -9: 3,
52
        -8: 3,
53
        -7: 3,
54
        -6: 3,
55
        -5: 3,
56
        -4: 4,
57
        -3: 4,
58
        -2: 4,
59
        -1: 4,
60
        0: 4,
61
        1: 5,
62
        2: 5,
63
        3: 5,
64
        4: 5,
65
        5: 5,
66
        6: 6,
67
        7: 6,
68
        8: 6,
69
        9: 6,
70
        10: 6,
71
        11: 7,
72
        12: 7,
73
        13: 7,
74
        14: 7,
75
        15: 7,
76
        16: 8,
77
        17: 8,
78
        18: 8,
79
        19: 8,
80
        20: 8,
81
        21: 9,
82
        22: 9,
83
        23: 9,
84
        24: 9,
85
        25: 9,
86
        26: 10,
87
        27: 10,
88
        28: 10,
89
        29: 10,
90
        30: 10,
91
        31: 10,
92
        32: 10,
93
        33: 10,
94
        34: 10,
95
        35: 10,
96
        36: 10,
97
        37: 10,
98
        38: 10,
99
        39: 10,
100
        40: 10,
101
    }
102
103
104
def map_climate_zones_to_zensus():
105
    """Geospatial join of zensus cells and climate zones
106
107
    Returns
108
    -------
109
    None.
110
111
    """
112
    # Drop old table and create new one
113
    engine = db.engine()
114
    EgonMapZensusClimateZones.__table__.drop(bind=engine, checkfirst=True)
115
    EgonMapZensusClimateZones.__table__.create(bind=engine, checkfirst=True)
116
117
    # Read in file containing climate zones
118
    temperature_zones = gpd.read_file(
119
        os.path.join(
120
            os.getcwd(),
121
            "data_bundle_egon_data",
122
            "climate_zones_Germany",
123
            "TRY_Climate_Zone",
124
            "Climate_Zone.shp",
125
        )
126
    ).set_index("Station")
127
128
    # Import census cells and their centroids
129
    census_cells = db.select_geodataframe(
130
        f"""
131
        SELECT id as zensus_population_id, geom_point as geom
132
        FROM society.destatis_zensus_population_per_ha_inside_germany
133
        """,
134
        index_col="zensus_population_id",
135
        epsg=4326,
136
    )
137
138
    # Join climate zones and census cells
139
    join = (
140
        census_cells.sjoin(temperature_zones)
141
        .rename({"index_right": "climate_zone"}, axis="columns")
142
        .climate_zone
143
    )
144
145
    # Insert resulting dataframe to SQL table
146
    join.to_sql(
147
        EgonMapZensusClimateZones.__table__.name,
148
        schema=EgonMapZensusClimateZones.__table__.schema,
149
        con=db.engine(),
150
        if_exists="replace",
151
    )
152
153
154
def daily_demand_shares_per_climate_zone():
155
    """Calculates shares of heat demand per day for each cliamte zone
156
157
    Returns
158
    -------
159
    None.
160
161
    """
162
    # Drop old table and create new one
163
    engine = db.engine()
164
    EgonDailyHeatDemandPerClimateZone.__table__.drop(
165
        bind=engine, checkfirst=True
166
    )
167
    EgonDailyHeatDemandPerClimateZone.__table__.create(
168
        bind=engine, checkfirst=True
169
    )
170
171
    # Calulate daily demand shares
172
    h = h_value()
173
174
    # Normalize data to sum()=1
175
    daily_demand_shares = h.resample("d").sum() / h.sum()
176
177
    # Extract temperature class for each day and climate zone
178
    temperature_classes = temp_interval().resample("D").max()
179
180
    # Initilize dataframe
181
    df = pd.DataFrame(
182
        columns=[
183
            "climate_zone",
184
            "day_of_year",
185
            "temperature_class",
186
            "daily_demand_share",
187
        ]
188
    )
189
190
    # Insert data into dataframe
191
    for index, row in daily_demand_shares.transpose().iterrows():
192
193
        df = df.append(
194
            pd.DataFrame(
195
                data={
196
                    "climate_zone": index,
197
                    "day_of_year": row.index.day_of_year,
198
                    "daily_demand_share": row.values,
199
                    "temperature_class": temperature_classes[index][row.index],
200
                }
201
            )
202
        )
203
204
    # Insert dataframe to SQL table
205
    df.to_sql(
206
        EgonDailyHeatDemandPerClimateZone.__table__.name,
207
        schema=EgonDailyHeatDemandPerClimateZone.__table__.schema,
208
        con=db.engine(),
209
        if_exists="replace",
210
        index=False,
211
    )
212
213
214
class IdpProfiles:
215
    def __init__(self, df_index, **kwargs):
216
        self.df = pd.DataFrame(index=df_index)
217
218
        self.temperature = kwargs.get("temperature")
219
220
    def get_temperature_interval(self, how="geometric_series"):
221
        """Appoints the corresponding temperature interval to each temperature
222
        in the temperature vector.
223
        """
224
        self.df["temperature"] = self.temperature.values
225
226
        temperature = (
227
            self.df["temperature"]
228
            .resample("D")
229
            .mean()
230
            .reindex(self.df.index)
231
            .fillna(method="ffill")
232
            .fillna(method="bfill")
233
        )
234
235
        if how == "geometric_series":
236
            temperature_mean = (
237
                temperature
238
                + 0.5 * np.roll(temperature, 24)
239
                + 0.25 * np.roll(temperature, 48)
240
                + 0.125 * np.roll(temperature, 72)
241
            ) / 1.875
242
        elif how == "mean":
243
            temperature_mean = temperature
244
245
        else:
246
            temperature_mean = None
247
248
        self.df["temperature_geo"] = temperature_mean
249
250
        temperature_rounded = []
251
252
        for i in self.df["temperature_geo"]:
253
            temperature_rounded.append(ceil(i))
254
255
        intervals = temperature_classes()
256
257
        temperature_interval = []
258
        for i in temperature_rounded:
259
            temperature_interval.append(intervals[i])
260
261
        self.df["temperature_interval"] = temperature_interval
262
263
        return self.df
264
265
266
def temperature_profile_extract():
267
    """
268
    Description: Extract temperature data from atlite
269
    Returns
270
    -------
271
    temperature_profile : pandas.DataFrame
272
        Temperatur profile of all TRY Climate Zones 2011
273
274
    """
275
276
    cutout = era.import_cutout(boundary="Germany")
277
278
    coordinates_path = os.path.join(
279
        os.getcwd(),
280
        "data_bundle_egon_data",
281
        "climate_zones_Germany",
282
        "TRY_Climate_Zone",
283
    )
284
    station_location = pd.read_csv(
285
        os.path.join(coordinates_path, "station_coordinates.csv")
286
    )
287
288
    weather_cells = db.select_geodataframe(
289
        """
290
        SELECT geom FROM supply.egon_era5_weather_cells
291
        """,
292
        epsg=4326,
293
    )
294
295
    gdf = gpd.GeoDataFrame(
296
        station_location,
297
        geometry=gpd.points_from_xy(
298
            station_location.Longitude, station_location.Latitude
299
        ),
300
    )
301
302
    selected_weather_cells = gpd.sjoin(weather_cells, gdf).set_index("Station")
303
304
    temperature_profile = cutout.temperature(
305
        shapes=selected_weather_cells.geom.values,
306
        index=selected_weather_cells.index,
307
    ).to_pandas()
308
309
    return temperature_profile
310
311
312
def temp_interval():
313
    """
314
    Description: Create Dataframe with temperature data for TRY Climate Zones
315
    Returns
316
    -------
317
    temperature_interval : pandas.DataFrame
318
        Hourly temperature intrerval of all 15 TRY Climate station#s temperature profile
319
320
    """
321
    index = pd.date_range(datetime(2011, 1, 1, 0), periods=8760, freq="H")
322
    temperature_interval = pd.DataFrame()
323
    temp_profile = temperature_profile_extract()
324
325
    for x in range(len(temp_profile.columns)):
326
        name_station = temp_profile.columns[x]
327
        idp_this_station = IdpProfiles(
328
            index, temperature=temp_profile[temp_profile.columns[x]]
329
        ).get_temperature_interval(how="geometric_series")
330
        temperature_interval[name_station] = idp_this_station[
331
            "temperature_interval"
332
        ]
333
334
    return temperature_interval
335
336
337
def h_value():
338
    """
339
    Description: Assignment of daily demand scaling factor to each day of all TRY Climate Zones
340
341
    Returns
342
    -------
343
    h : pandas.DataFrame
344
        Hourly factor values for each station corresponding to the temperature profile.
345
        Extracted from demandlib.
346
347
    """
348
    index = pd.date_range(datetime(2011, 1, 1, 0), periods=8760, freq="H")
349
350
    a = 3.0469695
351
352
    b = -37.1833141
353
354
    c = 5.6727847
355
356
    d = 0.1163157
357
358
    temp_profile = temperature_profile_extract()
359
    temperature_profile_res = (
360
        temp_profile.resample("D")
361
        .mean()
362
        .reindex(index)
363
        .fillna(method="ffill")
364
        .fillna(method="bfill")
365
    )
366
367
    temp_profile_geom = (
368
        (
369
            temperature_profile_res.transpose()
370
            + 0.5 * np.roll(temperature_profile_res.transpose(), 24, axis=1)
371
            + 0.25 * np.roll(temperature_profile_res.transpose(), 48, axis=1)
372
            + 0.125 * np.roll(temperature_profile_res.transpose(), 72, axis=1)
373
        )
374
        / 1.875
375
    ).transpose()
376
377
    h = a / (1 + (b / (temp_profile_geom - 40)) ** c) + d
378
379
    return h
380