Passed
Push — dev ( 022020...8a5f7f )
by
unknown
01:47 queued 13s
created

data.datasets.heat_demand_timeseries.daily   A

Complexity

Total Complexity 14

Size/Duplication

Total Lines 383
Duplicated Lines 16.45 %

Importance

Changes 0
Metric Value
wmc 14
eloc 233
dl 63
loc 383
rs 10
c 0
b 0
f 0

6 Functions

Rating   Name   Duplication   Size   Complexity  
B temperature_classes() 63 63 1
A temp_interval() 0 23 2
A h_value() 0 43 1
A temperature_profile_extract() 0 44 1
A daily_demand_shares_per_climate_zone() 0 57 2
A map_climate_zones_to_zensus() 0 50 1

2 Methods

Rating   Name   Duplication   Size   Complexity  
A IdpProfiles.__init__() 0 4 1
B IdpProfiles.get_temperature_interval() 0 44 5

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
from datetime import datetime
2
import os
3
4
from sqlalchemy import Column, Float, Integer, Text
5
from sqlalchemy.ext.declarative import declarative_base
6
import geopandas as gpd
7
import numpy as np
8
import pandas as pd
9
10
from egon.data import db
11
import egon.data.datasets.era5 as era
12
13
14
from math import ceil
15
16
17
Base = declarative_base()
18
19
20
class EgonMapZensusClimateZones(Base):
21
    __tablename__ = "egon_map_zensus_climate_zones"
22
    __table_args__ = {"schema": "boundaries"}
23
24
    zensus_population_id = Column(Integer, primary_key=True)
25
    climate_zone = Column(Text)
26
27
28
class EgonDailyHeatDemandPerClimateZone(Base):
29
    __tablename__ = "egon_daily_heat_demand_per_climate_zone"
30
    __table_args__ = {"schema": "demand"}
31
32
    climate_zone = Column(Text, primary_key=True)
33
    day_of_year = Column(Integer, primary_key=True)
34
    temperature_class = Column(Integer)
35
    heat_demand_share = Column(Float(53))
36
37
38 View Code Duplication
def temperature_classes():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
39
    return {
40
        -20: 1,
41
        -19: 1,
42
        -18: 1,
43
        -17: 1,
44
        -16: 1,
45
        -15: 1,
46
        -14: 2,
47
        -13: 2,
48
        -12: 2,
49
        -11: 2,
50
        -10: 2,
51
        -9: 3,
52
        -8: 3,
53
        -7: 3,
54
        -6: 3,
55
        -5: 3,
56
        -4: 4,
57
        -3: 4,
58
        -2: 4,
59
        -1: 4,
60
        0: 4,
61
        1: 5,
62
        2: 5,
63
        3: 5,
64
        4: 5,
65
        5: 5,
66
        6: 6,
67
        7: 6,
68
        8: 6,
69
        9: 6,
70
        10: 6,
71
        11: 7,
72
        12: 7,
73
        13: 7,
74
        14: 7,
75
        15: 7,
76
        16: 8,
77
        17: 8,
78
        18: 8,
79
        19: 8,
80
        20: 8,
81
        21: 9,
82
        22: 9,
83
        23: 9,
84
        24: 9,
85
        25: 9,
86
        26: 10,
87
        27: 10,
88
        28: 10,
89
        29: 10,
90
        30: 10,
91
        31: 10,
92
        32: 10,
93
        33: 10,
94
        34: 10,
95
        35: 10,
96
        36: 10,
97
        37: 10,
98
        38: 10,
99
        39: 10,
100
        40: 10,
101
    }
102
103
104
def map_climate_zones_to_zensus():
105
    """Geospatial join of zensus cells and climate zones
106
107
    Returns
108
    -------
109
    None.
110
111
    """
112
    # Drop old table and create new one
113
    engine = db.engine()
114
    EgonMapZensusClimateZones.__table__.drop(bind=engine, checkfirst=True)
115
    EgonMapZensusClimateZones.__table__.create(bind=engine, checkfirst=True)
116
117
    # Read in file containing climate zones
118
    temperature_zones = gpd.read_file(
119
        os.path.join(
120
            os.getcwd(),
121
            "data_bundle_egon_data",
122
            "climate_zones_Germany",
123
            "TRY_Climate_Zone",
124
            "Climate_Zone.shp",
125
        )
126
    ).set_index("Station")
127
128
    # Import census cells and their centroids
129
    census_cells = db.select_geodataframe(
130
        f"""
131
        SELECT id as zensus_population_id, geom_point as geom
132
        FROM society.destatis_zensus_population_per_ha_inside_germany
133
        """,
134
        index_col="zensus_population_id",
135
        epsg=4326,
136
    )
137
138
    # Join climate zones and census cells
139
    join = (
140
        census_cells.sjoin(temperature_zones)
141
        .rename({"index_right": "climate_zone"}, axis="columns")
142
        .climate_zone
143
    )
144
145
    # Drop duplicates (some climate zones are overlapping)
146
    join = join[~join.index.duplicated(keep="first")]
147
148
    # Insert resulting dataframe to SQL table
149
    join.to_sql(
150
        EgonMapZensusClimateZones.__table__.name,
151
        schema=EgonMapZensusClimateZones.__table__.schema,
152
        con=db.engine(),
153
        if_exists="replace",
154
    )
155
156
157
def daily_demand_shares_per_climate_zone():
158
    """Calculates shares of heat demand per day for each cliamte zone
159
160
    Returns
161
    -------
162
    None.
163
164
    """
165
    # Drop old table and create new one
166
    engine = db.engine()
167
    EgonDailyHeatDemandPerClimateZone.__table__.drop(
168
        bind=engine, checkfirst=True
169
    )
170
    EgonDailyHeatDemandPerClimateZone.__table__.create(
171
        bind=engine, checkfirst=True
172
    )
173
174
    # Calulate daily demand shares
175
    h = h_value()
176
177
    # Normalize data to sum()=1
178
    daily_demand_shares = h.resample("d").sum() / h.sum()
179
180
    # Extract temperature class for each day and climate zone
181
    temperature_classes = temp_interval().resample("D").max()
182
183
    # Initilize dataframe
184
    df = pd.DataFrame(
185
        columns=[
186
            "climate_zone",
187
            "day_of_year",
188
            "temperature_class",
189
            "daily_demand_share",
190
        ]
191
    )
192
193
    # Insert data into dataframe
194
    for index, row in daily_demand_shares.transpose().iterrows():
195
196
        df = df.append(
197
            pd.DataFrame(
198
                data={
199
                    "climate_zone": index,
200
                    "day_of_year": row.index.day_of_year,
201
                    "daily_demand_share": row.values,
202
                    "temperature_class": temperature_classes[index][row.index],
203
                }
204
            )
205
        )
206
207
    # Insert dataframe to SQL table
208
    df.to_sql(
209
        EgonDailyHeatDemandPerClimateZone.__table__.name,
210
        schema=EgonDailyHeatDemandPerClimateZone.__table__.schema,
211
        con=db.engine(),
212
        if_exists="replace",
213
        index=False,
214
    )
215
216
217
class IdpProfiles:
218
    def __init__(self, df_index, **kwargs):
219
        self.df = pd.DataFrame(index=df_index)
220
221
        self.temperature = kwargs.get("temperature")
222
223
    def get_temperature_interval(self, how="geometric_series"):
224
        """Appoints the corresponding temperature interval to each temperature
225
        in the temperature vector.
226
        """
227
        self.df["temperature"] = self.temperature.values
228
229
        temperature = (
230
            self.df["temperature"]
231
            .resample("D")
232
            .mean()
233
            .reindex(self.df.index)
234
            .fillna(method="ffill")
235
            .fillna(method="bfill")
236
        )
237
238
        if how == "geometric_series":
239
            temperature_mean = (
240
                temperature
241
                + 0.5 * np.roll(temperature, 24)
242
                + 0.25 * np.roll(temperature, 48)
243
                + 0.125 * np.roll(temperature, 72)
244
            ) / 1.875
245
        elif how == "mean":
246
            temperature_mean = temperature
247
248
        else:
249
            temperature_mean = None
250
251
        self.df["temperature_geo"] = temperature_mean
252
253
        temperature_rounded = []
254
255
        for i in self.df["temperature_geo"]:
256
            temperature_rounded.append(ceil(i))
257
258
        intervals = temperature_classes()
259
260
        temperature_interval = []
261
        for i in temperature_rounded:
262
            temperature_interval.append(intervals[i])
263
264
        self.df["temperature_interval"] = temperature_interval
265
266
        return self.df
267
268
269
def temperature_profile_extract():
270
    """
271
    Description: Extract temperature data from atlite
272
    Returns
273
    -------
274
    temperature_profile : pandas.DataFrame
275
        Temperatur profile of all TRY Climate Zones 2011
276
277
    """
278
279
    cutout = era.import_cutout(boundary="Germany")
280
281
    coordinates_path = os.path.join(
282
        os.getcwd(),
283
        "data_bundle_egon_data",
284
        "climate_zones_Germany",
285
        "TRY_Climate_Zone",
286
    )
287
    station_location = pd.read_csv(
288
        os.path.join(coordinates_path, "station_coordinates.csv")
289
    )
290
291
    weather_cells = db.select_geodataframe(
292
        """
293
        SELECT geom FROM supply.egon_era5_weather_cells
294
        """,
295
        epsg=4326,
296
    )
297
298
    gdf = gpd.GeoDataFrame(
299
        station_location,
300
        geometry=gpd.points_from_xy(
301
            station_location.Longitude, station_location.Latitude
302
        ),
303
    )
304
305
    selected_weather_cells = gpd.sjoin(weather_cells, gdf).set_index("Station")
306
307
    temperature_profile = cutout.temperature(
308
        shapes=selected_weather_cells.geom.values,
309
        index=selected_weather_cells.index,
310
    ).to_pandas()
311
312
    return temperature_profile
313
314
315
def temp_interval():
316
    """
317
    Description: Create Dataframe with temperature data for TRY Climate Zones
318
    Returns
319
    -------
320
    temperature_interval : pandas.DataFrame
321
        Hourly temperature intrerval of all 15 TRY Climate station#s temperature profile
322
323
    """
324
    index = pd.date_range(datetime(2011, 1, 1, 0), periods=8760, freq="H")
325
    temperature_interval = pd.DataFrame()
326
    temp_profile = temperature_profile_extract()
327
328
    for x in range(len(temp_profile.columns)):
329
        name_station = temp_profile.columns[x]
330
        idp_this_station = IdpProfiles(
331
            index, temperature=temp_profile[temp_profile.columns[x]]
332
        ).get_temperature_interval(how="geometric_series")
333
        temperature_interval[name_station] = idp_this_station[
334
            "temperature_interval"
335
        ]
336
337
    return temperature_interval
338
339
340
def h_value():
341
    """
342
    Description: Assignment of daily demand scaling factor to each day of all TRY Climate Zones
343
344
    Returns
345
    -------
346
    h : pandas.DataFrame
347
        Hourly factor values for each station corresponding to the temperature profile.
348
        Extracted from demandlib.
349
350
    """
351
    index = pd.date_range(datetime(2011, 1, 1, 0), periods=8760, freq="H")
352
353
    a = 3.0469695
354
355
    b = -37.1833141
356
357
    c = 5.6727847
358
359
    d = 0.1163157
360
361
    temp_profile = temperature_profile_extract()
362
    temperature_profile_res = (
363
        temp_profile.resample("D")
364
        .mean()
365
        .reindex(index)
366
        .fillna(method="ffill")
367
        .fillna(method="bfill")
368
    )
369
370
    temp_profile_geom = (
371
        (
372
            temperature_profile_res.transpose()
373
            + 0.5 * np.roll(temperature_profile_res.transpose(), 24, axis=1)
374
            + 0.25 * np.roll(temperature_profile_res.transpose(), 48, axis=1)
375
            + 0.125 * np.roll(temperature_profile_res.transpose(), 72, axis=1)
376
        )
377
        / 1.875
378
    ).transpose()
379
380
    h = a / (1 + (b / (temp_profile_geom - 40)) ** c) + d
381
382
    return h
383