1
|
|
|
""" |
2
|
|
|
Household electricity demand time series for scenarios eGon2035 and eGon100RE at |
3
|
|
|
census cell level are set up. |
4
|
|
|
|
5
|
|
|
Electricity demand data for households in Germany in 1-hourly resolution for |
6
|
|
|
an entire year. Spatially, the data is resolved to 100 x 100 m cells and |
7
|
|
|
provides individual and distinct time series for each household in a cell. |
8
|
|
|
The cells are defined by the dataset Zensus 2011. |
9
|
|
|
|
10
|
|
|
""" |
11
|
|
|
|
12
|
|
|
from itertools import cycle, product |
13
|
|
|
from pathlib import Path |
14
|
|
|
import os |
15
|
|
|
import random |
16
|
|
|
|
17
|
|
|
from airflow.operators.python import PythonOperator |
18
|
|
|
from sqlalchemy import ARRAY, Column, Float, Integer, String |
19
|
|
|
from sqlalchemy.dialects.postgresql import CHAR, INTEGER, REAL |
20
|
|
|
from sqlalchemy.ext.declarative import declarative_base |
21
|
|
|
import numpy as np |
22
|
|
|
import pandas as pd |
23
|
|
|
|
24
|
|
|
from egon.data import db |
25
|
|
|
from egon.data.datasets import Dataset |
26
|
|
|
from egon.data.datasets.scenario_parameters import get_scenario_year |
27
|
|
|
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts |
28
|
|
|
import egon.data.config |
29
|
|
|
|
30
|
|
|
Base = declarative_base() |
31
|
|
|
engine = db.engine() |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
# Get random seed from config |
35
|
|
|
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"] |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
class IeeHouseholdLoadProfiles(Base): |
39
|
|
|
""" |
40
|
|
|
Class definition of table demand.iee_household_load_profiles. |
41
|
|
|
""" |
42
|
|
|
|
43
|
|
|
__tablename__ = "iee_household_load_profiles" |
44
|
|
|
__table_args__ = {"schema": "demand"} |
45
|
|
|
|
46
|
|
|
id = Column(INTEGER, primary_key=True) |
47
|
|
|
type = Column(CHAR(8), index=True) |
48
|
|
|
load_in_wh = Column(ARRAY(REAL)) |
49
|
|
|
|
50
|
|
|
|
51
|
|
|
class HouseholdElectricityProfilesInCensusCells(Base): |
52
|
|
|
""" |
53
|
|
|
Class definition of table demand.egon_household_electricity_profile_in_census_cell. |
54
|
|
|
|
55
|
|
|
Lists references and scaling parameters of time series data for each |
56
|
|
|
household in a cell by identifiers. This table is fundamental for creating |
57
|
|
|
subsequent data like demand profiles on MV grid level or for determining |
58
|
|
|
the peak load at load area level. |
59
|
|
|
|
60
|
|
|
""" |
61
|
|
|
|
62
|
|
|
__tablename__ = "egon_household_electricity_profile_in_census_cell" |
63
|
|
|
__table_args__ = {"schema": "demand"} |
64
|
|
|
|
65
|
|
|
cell_id = Column(Integer, primary_key=True) |
66
|
|
|
grid_id = Column(String) |
67
|
|
|
cell_profile_ids = Column(ARRAY(String, dimensions=1)) |
68
|
|
|
nuts3 = Column(String) |
69
|
|
|
nuts1 = Column(String) |
70
|
|
|
factor_2019 = Column(Float) |
71
|
|
|
factor_2023 = Column(Float) |
72
|
|
|
factor_2035 = Column(Float) |
73
|
|
|
factor_2050 = Column(Float) |
74
|
|
|
|
75
|
|
|
|
76
|
|
|
class EgonDestatisZensusHouseholdPerHaRefined(Base): |
77
|
|
|
""" |
78
|
|
|
Class definition of table society.egon_destatis_zensus_household_per_ha_refined. |
79
|
|
|
""" |
80
|
|
|
|
81
|
|
|
__tablename__ = "egon_destatis_zensus_household_per_ha_refined" |
82
|
|
|
__table_args__ = {"schema": "society"} |
83
|
|
|
|
84
|
|
|
id = Column(INTEGER, primary_key=True) |
85
|
|
|
cell_id = Column(Integer, index=True) |
86
|
|
|
grid_id = Column(String, index=True) |
87
|
|
|
nuts3 = Column(String) |
88
|
|
|
nuts1 = Column(String) |
89
|
|
|
characteristics_code = Column(Integer) |
90
|
|
|
hh_5types = Column(Integer) |
91
|
|
|
hh_type = Column(CHAR(2)) |
92
|
|
|
hh_10types = Column(Integer) |
93
|
|
|
|
94
|
|
|
|
95
|
|
|
class EgonEtragoElectricityHouseholds(Base): |
96
|
|
|
""" |
97
|
|
|
Class definition of table demand.egon_etrago_electricity_households. |
98
|
|
|
|
99
|
|
|
The table contains household electricity demand profiles aggregated at MV grid |
100
|
|
|
district level in MWh. |
101
|
|
|
""" |
102
|
|
|
|
103
|
|
|
__tablename__ = "egon_etrago_electricity_households" |
104
|
|
|
__table_args__ = {"schema": "demand"} |
105
|
|
|
|
106
|
|
|
bus_id = Column(Integer, primary_key=True) |
107
|
|
|
scn_name = Column(String, primary_key=True) |
108
|
|
|
p_set = Column(ARRAY(Float)) |
109
|
|
|
q_set = Column(ARRAY(Float)) |
110
|
|
|
|
111
|
|
|
|
112
|
|
|
class HouseholdDemands(Dataset): |
113
|
|
|
""" |
114
|
|
|
Household electricity demand time series for scenarios eGon2035 and eGon100RE at |
115
|
|
|
census cell level are set up. |
116
|
|
|
|
117
|
|
|
Electricity demand data for households in Germany in 1-hourly resolution for |
118
|
|
|
an entire year. Spatially, the data is resolved to 100 x 100 m cells and |
119
|
|
|
provides individual and distinct time series for each household in a cell. |
120
|
|
|
The cells are defined by the dataset Zensus 2011. |
121
|
|
|
|
122
|
|
|
*Dependencies* |
123
|
|
|
* :py:class:`DemandRegio <egon.data.datasets.demandregio.DemandRegio>` |
124
|
|
|
* :py:class:`Vg250 <egon.data.datasets.vg250.Vg250>` |
125
|
|
|
* :py:class:`OsmBuildingsStreets <egon.data.datasets.osm_buildings_streets.OsmBuildingsStreets>` |
126
|
|
|
* :py:func:`create_buildings_residential_zensus_mapping <egon.data.datasets.osm_buildings_streets.create_buildings_residential_zensus_mapping>` |
127
|
|
|
* :py:class:`ZensusMiscellaneous <egon.data.datasets.zensus.ZensusMiscellaneous>` |
128
|
|
|
* :py:class:`ZensusMvGridDistricts <egon.data.datasets.zensus_mv_grid_districts.ZensusMvGridDistricts>` |
129
|
|
|
* :py:class:`ZensusVg250 <egon.data.datasets.zensus_vg250.ZensusVg250>` |
130
|
|
|
|
131
|
|
|
*Resulting tables* |
132
|
|
|
* :py:class:`demand.iee_household_load_profiles |
133
|
|
|
<IeeHouseholdLoadProfiles>` is created and filled |
134
|
|
|
* :py:class:`demand.egon_household_electricity_profile_in_census_cell |
135
|
|
|
<HouseholdElectricityProfilesInCensusCells>` is created and filled |
136
|
|
|
* :py:class:`society.egon_destatis_zensus_household_per_ha_refined |
137
|
|
|
<EgonDestatisZensusHouseholdPerHaRefined>` is created and filled |
138
|
|
|
* :py:class:`demand.egon_etrago_electricity_households |
139
|
|
|
<EgonEtragoElectricityHouseholds>` is created and filled |
140
|
|
|
|
141
|
|
|
The following datasets are used for creating the data: |
142
|
|
|
|
143
|
|
|
* Electricity demand time series for household categories |
144
|
|
|
produced by demand profile generator (DPG) from Fraunhofer IEE |
145
|
|
|
(see :func:`get_iee_hh_demand_profiles_raw`) |
146
|
|
|
* Spatial information about people living in households by Zensus 2011 at |
147
|
|
|
federal state level |
148
|
|
|
|
149
|
|
|
* Type of household (family status) |
150
|
|
|
* Age |
151
|
|
|
* Number of people |
152
|
|
|
* Spatial information about number of households per ha, categorized by type |
153
|
|
|
of household (family status) with 5 categories (also from Zensus 2011) |
154
|
|
|
* Demand-Regio annual household demand at NUTS3 level |
155
|
|
|
|
156
|
|
|
*What is the goal?* |
157
|
|
|
|
158
|
|
|
To use the electricity demand time series from the `demand profile generator` |
159
|
|
|
to created spatially reference household demand time series for Germany at a |
160
|
|
|
resolution of 100 x 100 m cells. |
161
|
|
|
|
162
|
|
|
*What is the challenge?* |
163
|
|
|
|
164
|
|
|
The electricity demand time series produced by demand profile generator offer |
165
|
|
|
12 different household profile categories. |
166
|
|
|
To use most of them, the spatial information about the number of households |
167
|
|
|
per cell (5 categories) needs to be enriched by supplementary data to match |
168
|
|
|
the household demand profile categories specifications. Hence, 10 out of 12 |
169
|
|
|
different household profile categories can be distinguished by increasing |
170
|
|
|
the number of categories of cell-level household data. |
171
|
|
|
|
172
|
|
|
*How are these datasets combined?* |
173
|
|
|
|
174
|
|
|
* Spatial information about people living in households by zensus (2011) at |
175
|
|
|
federal state NUTS1 level `df_zensus` is aggregated to be compatible |
176
|
|
|
to IEE household profile specifications. |
177
|
|
|
|
178
|
|
|
* exclude kids and reduce to adults and seniors |
179
|
|
|
* group as defined in `HH_TYPES` |
180
|
|
|
* convert data from people living in households to number of households |
181
|
|
|
by `mapping_people_in_households` |
182
|
|
|
* calculate fraction of fine household types (10) within subgroup of rough |
183
|
|
|
household types (5) `df_dist_households` |
184
|
|
|
* Spatial information about number of households per ha |
185
|
|
|
`df_census_households_nuts3` is mapped to NUTS1 and NUTS3 level. |
186
|
|
|
Data is refined with household subgroups via |
187
|
|
|
`df_dist_households` to `df_census_households_grid_refined`. |
188
|
|
|
* Enriched 100 x 100 m household dataset is used to sample and aggregate |
189
|
|
|
household profiles. A table including individual profile id's for each cell |
190
|
|
|
and scaling factor to match Demand-Regio annual sum projections for 2035 |
191
|
|
|
and 2050 at NUTS3 level is created in the database as |
192
|
|
|
`demand.household_electricity_profiles_in_census_cells`. |
193
|
|
|
|
194
|
|
|
*What are central assumptions during the data processing?* |
195
|
|
|
|
196
|
|
|
* Mapping zensus data to IEE household categories is not trivial. In |
197
|
|
|
conversion from persons in household to number of |
198
|
|
|
households, number of inhabitants for multi-person households is estimated |
199
|
|
|
as weighted average in `OO_factor` |
200
|
|
|
* The distribution to refine household types at cell level are the same for |
201
|
|
|
each federal state |
202
|
|
|
* Refining of household types lead to float number of profiles drew at cell |
203
|
|
|
level and need to be rounded to nearest int by np.rint(). |
204
|
|
|
* 100 x 100 m cells are matched to NUTS via cells centroid location |
205
|
|
|
* Cells with households in unpopulated areas are removed |
206
|
|
|
|
207
|
|
|
*Drawbacks and limitations of the data* |
208
|
|
|
|
209
|
|
|
* The distribution to refine household types at cell level are the same for |
210
|
|
|
each federal state |
211
|
|
|
* Household profiles aggregated annual demand matches Demand Regio demand at |
212
|
|
|
NUTS-3 level, but it is not matching the demand regio time series profile |
213
|
|
|
* Due to secrecy, some census data are highly modified under certain attributes |
214
|
|
|
(quantity_q = 2). This cell data is not corrected, but excluded. |
215
|
|
|
* There is deviation in the Census data from table to table. The statistical |
216
|
|
|
methods are not stringent. Hence, there are cases in which data contradicts. |
217
|
|
|
* Census data with attribute 'HHTYP_FAM' is missing for some cells with small |
218
|
|
|
amount of households. This data is generated using the average share of |
219
|
|
|
household types for cells with similar household number. For some cells the |
220
|
|
|
summed amount of households per type deviates from the total number with |
221
|
|
|
attribute 'INSGESAMT'. As the profiles are scaled with demand-regio data at |
222
|
|
|
nuts3-level the impact at a higher aggregation level is negligible. |
223
|
|
|
For sake of simplicity, the data is not corrected. |
224
|
|
|
* There are cells without household data but a population. A randomly chosen |
225
|
|
|
household distribution is taken from a subgroup of cells with same population |
226
|
|
|
value and applied to all cells with missing household distribution and the |
227
|
|
|
specific population value. |
228
|
|
|
|
229
|
|
|
*Helper functions* |
230
|
|
|
|
231
|
|
|
* To access the DB, select specific profiles at various aggregation levels |
232
|
|
|
use :py:func:`get_hh_profiles_from_db` |
233
|
|
|
* To access the DB, select specific profiles at various aggregation levels |
234
|
|
|
and scale profiles use :py:func:`get_scaled_profiles_from_db` |
235
|
|
|
|
236
|
|
|
|
237
|
|
|
""" |
238
|
|
|
|
239
|
|
|
#: |
240
|
|
|
name: str = "Household Demands" |
241
|
|
|
#: |
242
|
|
|
version: str = "0.0.12" |
243
|
|
|
|
244
|
|
|
def __init__(self, dependencies): |
245
|
|
|
tasks = ( |
246
|
|
|
create_table, |
247
|
|
|
houseprofiles_in_census_cells, |
248
|
|
|
) |
249
|
|
|
|
250
|
|
|
if ( |
251
|
|
|
"status2019" |
252
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
253
|
|
|
): |
254
|
|
|
mv_hh_electricity_load_2035 = PythonOperator( |
255
|
|
|
task_id="MV-hh-electricity-load-2019", |
256
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
257
|
|
|
op_args=["status2019", 2019], |
258
|
|
|
) |
259
|
|
|
|
260
|
|
|
tasks = tasks + (mv_hh_electricity_load_2035,) |
261
|
|
|
|
262
|
|
|
if ( |
263
|
|
|
"status2023" |
264
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
265
|
|
|
): |
266
|
|
|
mv_hh_electricity_load_2035 = PythonOperator( |
267
|
|
|
task_id="MV-hh-electricity-load-2023", |
268
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
269
|
|
|
op_args=["status2023", 2023], |
270
|
|
|
) |
271
|
|
|
|
272
|
|
|
tasks = tasks + (mv_hh_electricity_load_2035,) |
273
|
|
|
|
274
|
|
|
if ( |
275
|
|
|
"eGon2035" |
276
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
277
|
|
|
): |
278
|
|
|
mv_hh_electricity_load_2035 = PythonOperator( |
279
|
|
|
task_id="MV-hh-electricity-load-2035", |
280
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
281
|
|
|
op_args=["eGon2035", 2035], |
282
|
|
|
) |
283
|
|
|
|
284
|
|
|
tasks = tasks + (mv_hh_electricity_load_2035,) |
285
|
|
|
|
286
|
|
|
if ( |
287
|
|
|
"eGon100RE" |
288
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
289
|
|
|
): |
290
|
|
|
mv_hh_electricity_load_2050 = PythonOperator( |
291
|
|
|
task_id="MV-hh-electricity-load-2050", |
292
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
293
|
|
|
op_args=["eGon100RE", 2050], |
294
|
|
|
) |
295
|
|
|
|
296
|
|
|
tasks = tasks + (mv_hh_electricity_load_2050,) |
297
|
|
|
|
298
|
|
|
super().__init__( |
299
|
|
|
name=self.name, |
300
|
|
|
version=self.version, |
301
|
|
|
dependencies=dependencies, |
302
|
|
|
tasks=tasks, |
303
|
|
|
) |
304
|
|
|
|
305
|
|
|
|
306
|
|
|
def create_table(): |
307
|
|
|
EgonEtragoElectricityHouseholds.__table__.drop( |
308
|
|
|
bind=engine, checkfirst=True |
309
|
|
|
) |
310
|
|
|
EgonEtragoElectricityHouseholds.__table__.create( |
311
|
|
|
bind=engine, checkfirst=True |
312
|
|
|
) |
313
|
|
|
|
314
|
|
|
|
315
|
|
|
def clean(x): |
316
|
|
|
"""Clean zensus household data row-wise |
317
|
|
|
|
318
|
|
|
Clean dataset by |
319
|
|
|
|
320
|
|
|
* converting '.' and '-' to str(0) |
321
|
|
|
* removing brackets |
322
|
|
|
|
323
|
|
|
Table can be converted to int/floats afterwards |
324
|
|
|
|
325
|
|
|
Parameters |
326
|
|
|
---------- |
327
|
|
|
x: pd.Series |
328
|
|
|
It is meant to be used with :code:`df.applymap()` |
329
|
|
|
|
330
|
|
|
Returns |
331
|
|
|
------- |
332
|
|
|
pd.Series |
333
|
|
|
Re-formatted data row |
334
|
|
|
""" |
335
|
|
|
x = str(x).replace("-", str(0)) |
336
|
|
|
x = str(x).replace(".", str(0)) |
337
|
|
|
x = x.strip("()") |
338
|
|
|
return x |
339
|
|
|
|
340
|
|
|
|
341
|
|
|
def write_hh_profiles_to_db(hh_profiles): |
342
|
|
|
"""Write HH demand profiles of IEE into db. One row per profile type. |
343
|
|
|
The annual load profile timeseries is an array. |
344
|
|
|
|
345
|
|
|
schema: demand |
346
|
|
|
tablename: iee_household_load_profiles |
347
|
|
|
|
348
|
|
|
|
349
|
|
|
|
350
|
|
|
Parameters |
351
|
|
|
---------- |
352
|
|
|
hh_profiles: pd.DataFrame |
353
|
|
|
It is meant to be used with :code:`df.applymap()` |
354
|
|
|
|
355
|
|
|
Returns |
356
|
|
|
------- |
357
|
|
|
""" |
358
|
|
|
hh_profiles = hh_profiles.rename_axis("type", axis=1) |
359
|
|
|
hh_profiles = hh_profiles.rename_axis("timestep", axis=0) |
360
|
|
|
hh_profiles = hh_profiles.stack().rename("load_in_wh") |
361
|
|
|
hh_profiles = hh_profiles.to_frame().reset_index() |
362
|
|
|
hh_profiles = hh_profiles.groupby("type").load_in_wh.apply(tuple) |
363
|
|
|
hh_profiles = hh_profiles.reset_index() |
364
|
|
|
|
365
|
|
|
IeeHouseholdLoadProfiles.__table__.drop(bind=engine, checkfirst=True) |
366
|
|
|
IeeHouseholdLoadProfiles.__table__.create(bind=engine) |
367
|
|
|
|
368
|
|
|
hh_profiles.to_sql( |
369
|
|
|
name=IeeHouseholdLoadProfiles.__table__.name, |
370
|
|
|
schema=IeeHouseholdLoadProfiles.__table__.schema, |
371
|
|
|
con=engine, |
372
|
|
|
if_exists="append", |
373
|
|
|
method="multi", |
374
|
|
|
chunksize=100, |
375
|
|
|
index=False, |
376
|
|
|
dtype={ |
377
|
|
|
"load_in_wh": IeeHouseholdLoadProfiles.load_in_wh.type, |
378
|
|
|
"type": IeeHouseholdLoadProfiles.type.type, |
379
|
|
|
"id": IeeHouseholdLoadProfiles.id.type, |
380
|
|
|
}, |
381
|
|
|
) |
382
|
|
|
|
383
|
|
|
|
384
|
|
|
def get_iee_hh_demand_profiles_raw(): |
385
|
|
|
"""Gets and returns household electricity demand profiles from the |
386
|
|
|
egon-data-bundle. |
387
|
|
|
|
388
|
|
|
Household electricity demand profiles generated by Fraunhofer IEE. |
389
|
|
|
Methodology is described in |
390
|
|
|
`Erzeugung zeitlich hochaufgelöster Stromlastprofile für verschiedene Haushaltstypen |
391
|
|
|
<https://www.researchgate.net/publication/273775902_Erzeugung_zeitlich_hochaufgeloster_Stromlastprofile_fur_verschiedene_Haushaltstypen>`_. |
392
|
|
|
It is used and further described in the following theses by: |
393
|
|
|
|
394
|
|
|
* Jonas Haack: |
395
|
|
|
"Auswirkungen verschiedener Haushaltslastprofile auf PV-Batterie-Systeme" |
396
|
|
|
(confidential) |
397
|
|
|
* Simon Ruben Drauz |
398
|
|
|
"Synthesis of a heat and electrical load profile for single and |
399
|
|
|
multi-family houses used for subsequent performance tests of a |
400
|
|
|
multi-component energy system", |
401
|
|
|
http://dx.doi.org/10.13140/RG.2.2.13959.14248 |
402
|
|
|
|
403
|
|
|
Notes |
404
|
|
|
----- |
405
|
|
|
The household electricity demand profiles have been generated for 2016 |
406
|
|
|
which is a leap year (8784 hours) starting on a Friday. The weather year |
407
|
|
|
is 2011 and the heat timeseries 2011 are generated for 2011 too (cf. |
408
|
|
|
dataset :mod:`egon.data.datasets.heat_demand_timeseries.HTS`), having |
409
|
|
|
8760h and starting on a Saturday. To align the profiles, the first day of |
410
|
|
|
the IEE profiles are deleted, resulting in 8760h starting on Saturday. |
411
|
|
|
|
412
|
|
|
Returns |
413
|
|
|
------- |
414
|
|
|
pd.DataFrame |
415
|
|
|
Table with profiles in columns and time as index. A pd.MultiIndex is |
416
|
|
|
used to distinguish load profiles from different EUROSTAT household |
417
|
|
|
types. |
418
|
|
|
""" |
419
|
|
|
data_config = egon.data.config.datasets() |
420
|
|
|
pa_config = data_config["hh_demand_profiles"] |
421
|
|
|
|
422
|
|
|
def ve(s): |
423
|
|
|
raise (ValueError(s)) |
424
|
|
|
|
425
|
|
|
dataset = egon.data.config.settings()["egon-data"]["--dataset-boundary"] |
426
|
|
|
|
427
|
|
|
file_section = ( |
428
|
|
|
"path" |
429
|
|
|
if dataset == "Everything" |
430
|
|
|
else ( |
431
|
|
|
"path_testmode" |
432
|
|
|
if dataset == "Schleswig-Holstein" |
433
|
|
|
else ve(f"'{dataset}' is not a valid dataset boundary.") |
434
|
|
|
) |
435
|
|
|
) |
436
|
|
|
|
437
|
|
|
file_path = pa_config["sources"]["household_electricity_demand_profiles"][ |
438
|
|
|
file_section |
439
|
|
|
] |
440
|
|
|
|
441
|
|
|
download_directory = os.path.join( |
442
|
|
|
"data_bundle_egon_data", "household_electricity_demand_profiles" |
443
|
|
|
) |
444
|
|
|
|
445
|
|
|
hh_profiles_file = ( |
446
|
|
|
Path(".") / Path(download_directory) / Path(file_path).name |
447
|
|
|
) |
448
|
|
|
|
449
|
|
|
df_hh_profiles = pd.read_hdf(hh_profiles_file) |
450
|
|
|
|
451
|
|
|
# aggregate profile types O2, O1 and O0 as there is no differentiation |
452
|
|
|
# possible at cell level see :func:`regroup_nuts1_census_data`. |
453
|
|
|
merge_profiles = [i for i in df_hh_profiles.columns if "O1" in i[:3]] |
454
|
|
|
merge_profiles += [i for i in df_hh_profiles.columns if "O2" in i[:3]] |
455
|
|
|
merge_profiles += [i for i in df_hh_profiles.columns if "O0" in i[:3]] |
456
|
|
|
mapping = {f"{old}": f"O0a{i:05d}" for i, old in enumerate(merge_profiles)} |
457
|
|
|
df_hh_profiles.rename(columns=mapping, inplace=True) |
458
|
|
|
|
459
|
|
|
return df_hh_profiles |
460
|
|
|
|
461
|
|
|
|
462
|
|
|
def set_multiindex_to_profiles(hh_profiles): |
463
|
|
|
"""The profile id is split into type and number and set as multiindex. |
464
|
|
|
|
465
|
|
|
Parameters |
466
|
|
|
---------- |
467
|
|
|
hh_profiles: pd.DataFrame |
468
|
|
|
Profiles |
469
|
|
|
Returns |
470
|
|
|
------- |
471
|
|
|
hh_profiles: pd.DataFrame |
472
|
|
|
Profiles with Multiindex |
473
|
|
|
""" |
474
|
|
|
|
475
|
|
|
# set multiindex to HH_types |
476
|
|
|
hh_profiles.columns = pd.MultiIndex.from_arrays( |
477
|
|
|
[hh_profiles.columns.str[:2], hh_profiles.columns.str[3:]] |
478
|
|
|
) |
479
|
|
|
|
480
|
|
|
# Cast profile ids into tuple of type and int |
481
|
|
|
hh_profiles.columns = pd.MultiIndex.from_tuples( |
482
|
|
|
[(a, int(b)) for a, b in hh_profiles.columns] |
483
|
|
|
) |
484
|
|
|
|
485
|
|
|
return hh_profiles |
486
|
|
|
|
487
|
|
|
|
488
|
|
|
def get_census_households_nuts1_raw(): |
489
|
|
|
"""Get zensus age x household type data from egon-data-bundle |
490
|
|
|
|
491
|
|
|
Dataset about household size with information about the categories: |
492
|
|
|
|
493
|
|
|
* family type |
494
|
|
|
* age class |
495
|
|
|
* household size |
496
|
|
|
|
497
|
|
|
for Germany in spatial resolution of federal states NUTS-1. |
498
|
|
|
|
499
|
|
|
Data manually selected and retrieved from: |
500
|
|
|
https://ergebnisse2011.zensus2022.de/datenbank/online |
501
|
|
|
For reproducing data selection, please do: |
502
|
|
|
|
503
|
|
|
* Search for: "1000A-3016" |
504
|
|
|
* or choose topic: "Bevölkerung kompakt" |
505
|
|
|
* Choose table code: "1000A-3016" with title "Personen: Alter |
506
|
|
|
(11 Altersklassen) - Größe des privaten Haushalts - Typ des privaten |
507
|
|
|
Haushalts (nach Familien/Lebensform)" |
508
|
|
|
* Change setting "GEOLK1" to "Bundesländer (16)" |
509
|
|
|
|
510
|
|
|
Data would be available in higher resolution |
511
|
|
|
("Landkreise und kreisfreie Städte (412)"), but only after registration. |
512
|
|
|
|
513
|
|
|
The downloaded file is called 'Zensus2011_Personen.csv'. |
514
|
|
|
|
515
|
|
|
|
516
|
|
|
Returns |
517
|
|
|
------- |
518
|
|
|
pd.DataFrame |
519
|
|
|
Pre-processed zensus household data |
520
|
|
|
""" |
521
|
|
|
data_config = egon.data.config.datasets() |
522
|
|
|
pa_config = data_config["hh_demand_profiles"] |
523
|
|
|
file_path = pa_config["sources"]["zensus_household_types"]["path"] |
524
|
|
|
|
525
|
|
|
download_directory = os.path.join( |
526
|
|
|
"data_bundle_egon_data", "zensus_households" |
527
|
|
|
) |
528
|
|
|
|
529
|
|
|
households_file = ( |
530
|
|
|
Path(".") / Path(download_directory) / Path(file_path).name |
531
|
|
|
) |
532
|
|
|
|
533
|
|
|
households_raw = pd.read_csv( |
534
|
|
|
households_file, |
535
|
|
|
sep=";", |
536
|
|
|
decimal=".", |
537
|
|
|
skiprows=5, |
538
|
|
|
skipfooter=7, |
539
|
|
|
index_col=[0, 1], |
540
|
|
|
header=[0, 1], |
541
|
|
|
encoding="latin1", |
542
|
|
|
engine="python", |
543
|
|
|
) |
544
|
|
|
|
545
|
|
|
return households_raw |
546
|
|
|
|
547
|
|
|
|
548
|
|
|
def create_missing_zensus_data( |
549
|
|
|
df_households_typ, df_missing_data, missing_cells |
550
|
|
|
): |
551
|
|
|
""" |
552
|
|
|
Generate missing data as average share of the household types for cell groups with |
553
|
|
|
the same amount of households. |
554
|
|
|
|
555
|
|
|
There is missing data for specific attributes in the zensus dataset because |
556
|
|
|
of secrecy reasons. Some cells with only small amount of households are |
557
|
|
|
missing with attribute HHTYP_FAM. However the total amount of households |
558
|
|
|
is known with attribute INSGESAMT. The missing data is generated as average |
559
|
|
|
share of the household types for cell groups with the same amount of |
560
|
|
|
households. |
561
|
|
|
|
562
|
|
|
Parameters |
563
|
|
|
---------- |
564
|
|
|
df_households_typ: pd.DataFrame |
565
|
|
|
Zensus households data |
566
|
|
|
df_missing_data: pd.DataFrame |
567
|
|
|
number of missing cells of group of amount of households |
568
|
|
|
missing_cells: dict |
569
|
|
|
dictionary with list of grids of the missing cells grouped by amount of |
570
|
|
|
households in cell |
571
|
|
|
|
572
|
|
|
Returns |
573
|
|
|
---------- |
574
|
|
|
df_average_split: pd.DataFrame |
575
|
|
|
generated dataset of missing cells |
576
|
|
|
|
577
|
|
|
""" |
578
|
|
|
# grid_ids of missing cells grouped by amount of households |
579
|
|
|
missing_grid_ids = { |
580
|
|
|
group: list(df.grid_id) |
581
|
|
|
for group, df in missing_cells.groupby("quantity") |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
# Grid ids for cells with low household numbers |
585
|
|
|
df_households_typ = df_households_typ.set_index("grid_id", drop=True) |
586
|
|
|
hh_in_cells = df_households_typ.groupby("grid_id")["quantity"].sum() |
587
|
|
|
hh_index = { |
588
|
|
|
i: hh_in_cells.loc[hh_in_cells == i].index |
589
|
|
|
for i in df_missing_data.households.values |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
df_average_split = pd.DataFrame() |
593
|
|
|
for hh_size, index in hh_index.items(): |
594
|
|
|
# average split of household types in cells with low household numbers |
595
|
|
|
split = ( |
596
|
|
|
df_households_typ.loc[index].groupby("characteristics_code").sum() |
597
|
|
|
/ df_households_typ.loc[index].quantity.sum() |
598
|
|
|
) |
599
|
|
|
split = split.quantity * hh_size |
600
|
|
|
|
601
|
|
|
# correct rounding |
602
|
|
|
difference = int(split.sum() - split.round().sum()) |
603
|
|
|
if difference > 0: |
604
|
|
|
# add to any row |
605
|
|
|
split = split.round() |
606
|
|
|
random_row = split.sample() |
607
|
|
|
split[random_row.index] = random_row + difference |
608
|
|
|
elif difference < 0: |
609
|
|
|
# subtract only from rows > 0 |
610
|
|
|
split = split.round() |
611
|
|
|
random_row = split[split > 0].sample() |
612
|
|
|
split[random_row.index] = random_row + difference |
613
|
|
|
else: |
614
|
|
|
split = split.round() |
615
|
|
|
|
616
|
|
|
# Dataframe with average split for each cell |
617
|
|
|
temp = pd.DataFrame( |
618
|
|
|
product(zip(split, range(1, 6)), missing_grid_ids[hh_size]), |
619
|
|
|
columns=["tuple", "grid_id"], |
620
|
|
|
) |
621
|
|
|
temp = pd.DataFrame(temp.tuple.tolist()).join(temp.grid_id) |
622
|
|
|
temp = temp.rename(columns={0: "hh_5types", 1: "characteristics_code"}) |
623
|
|
|
temp = temp.dropna() |
624
|
|
|
temp = temp[(temp["hh_5types"] != 0)] |
625
|
|
|
# append for each cell group of households |
626
|
|
|
df_average_split = pd.concat( |
627
|
|
|
[df_average_split, temp], ignore_index=True |
628
|
|
|
) |
629
|
|
|
df_average_split["hh_5types"] = df_average_split["hh_5types"].astype(int) |
630
|
|
|
|
631
|
|
|
return df_average_split |
632
|
|
|
|
633
|
|
|
|
634
|
|
|
def process_nuts1_census_data(df_census_households_raw): |
635
|
|
|
"""Make data compatible with household demand profile categories |
636
|
|
|
|
637
|
|
|
Removes and reorders categories which are not needed to fit data to |
638
|
|
|
household types of IEE electricity demand time series generated by |
639
|
|
|
demand-profile-generator (DPG). |
640
|
|
|
|
641
|
|
|
* Kids (<15) are excluded as they are also excluded in DPG origin dataset |
642
|
|
|
* Adults (15<65) |
643
|
|
|
* Seniors (<65) |
644
|
|
|
|
645
|
|
|
Parameters |
646
|
|
|
---------- |
647
|
|
|
df_census_households_raw: pd.DataFrame |
648
|
|
|
cleaned zensus household type x age category data |
649
|
|
|
|
650
|
|
|
Returns |
651
|
|
|
------- |
652
|
|
|
pd.DataFrame |
653
|
|
|
Aggregated zensus household data on NUTS-1 level |
654
|
|
|
""" |
655
|
|
|
|
656
|
|
|
# Clean data to int only |
657
|
|
|
df_census_households = df_census_households_raw.applymap(clean).applymap( |
658
|
|
|
int |
659
|
|
|
) |
660
|
|
|
|
661
|
|
|
# Group data to fit Load Profile Generator categories |
662
|
|
|
# define kids/adults/seniors |
663
|
|
|
kids = ["Unter 3", "3 - 5", "6 - 14"] # < 15 |
664
|
|
|
adults = [ |
665
|
|
|
"15 - 17", |
666
|
|
|
"18 - 24", |
667
|
|
|
"25 - 29", |
668
|
|
|
"30 - 39", |
669
|
|
|
"40 - 49", |
670
|
|
|
"50 - 64", |
671
|
|
|
] # 15 < x <65 |
672
|
|
|
seniors = ["65 - 74", "75 und älter"] # >65 |
673
|
|
|
|
674
|
|
|
# sum groups of kids, adults and seniors and concat |
675
|
|
|
df_kids = ( |
676
|
|
|
df_census_households.loc[:, (slice(None), kids)] |
677
|
|
|
.groupby(level=0, axis=1) |
678
|
|
|
.sum() |
679
|
|
|
) |
680
|
|
|
df_adults = ( |
681
|
|
|
df_census_households.loc[:, (slice(None), adults)] |
682
|
|
|
.groupby(level=0, axis=1) |
683
|
|
|
.sum() |
684
|
|
|
) |
685
|
|
|
df_seniors = ( |
686
|
|
|
df_census_households.loc[:, (slice(None), seniors)] |
687
|
|
|
.groupby(level=0, axis=1) |
688
|
|
|
.sum() |
689
|
|
|
) |
690
|
|
|
df_census_households = pd.concat( |
691
|
|
|
[df_kids, df_adults, df_seniors], |
692
|
|
|
axis=1, |
693
|
|
|
keys=["Kids", "Adults", "Seniors"], |
694
|
|
|
names=["age", "persons"], |
695
|
|
|
) |
696
|
|
|
|
697
|
|
|
# reduce column names to state only |
698
|
|
|
mapping_state = { |
699
|
|
|
i: i.split()[1] |
700
|
|
|
for i in df_census_households.index.get_level_values(level=0) |
701
|
|
|
} |
702
|
|
|
|
703
|
|
|
# rename index |
704
|
|
|
df_census_households = df_census_households.rename( |
705
|
|
|
index=mapping_state, level=0 |
706
|
|
|
) |
707
|
|
|
# rename axis |
708
|
|
|
df_census_households = df_census_households.rename_axis(["state", "type"]) |
709
|
|
|
# unstack |
710
|
|
|
df_census_households = df_census_households.unstack() |
711
|
|
|
# reorder levels |
712
|
|
|
df_census_households = df_census_households.reorder_levels( |
713
|
|
|
order=["type", "persons", "age"], axis=1 |
714
|
|
|
) |
715
|
|
|
|
716
|
|
|
return df_census_households |
717
|
|
|
|
718
|
|
|
|
719
|
|
|
def regroup_nuts1_census_data(df_census_households_nuts1): |
720
|
|
|
"""Regroup census data and map according to demand-profile types. |
721
|
|
|
|
722
|
|
|
For more information look at the respective publication: |
723
|
|
|
https://www.researchgate.net/publication/273775902_Erzeugung_zeitlich_hochaufgeloster_Stromlastprofile_fur_verschiedene_Haushaltstypen |
724
|
|
|
|
725
|
|
|
|
726
|
|
|
Parameters |
727
|
|
|
---------- |
728
|
|
|
df_census_households_nuts1: pd.DataFrame |
729
|
|
|
census household data on NUTS-1 level in absolute values |
730
|
|
|
|
731
|
|
|
Returns |
732
|
|
|
---------- |
733
|
|
|
df_dist_households: pd.DataFrame |
734
|
|
|
Distribution of households type |
735
|
|
|
""" |
736
|
|
|
|
737
|
|
|
# Mapping of census household family types to Eurostat household types |
738
|
|
|
# - Adults living in households type |
739
|
|
|
# - kids are not included even if mentioned in household type name |
740
|
|
|
# **! The Eurostat data only counts adults/seniors, excluding kids <15** |
741
|
|
|
# Eurostat household types are used for demand-profile-generator |
742
|
|
|
# @iee-fraunhofer |
743
|
|
|
hh_types_eurostat = { |
744
|
|
|
"SR": [ |
745
|
|
|
("Einpersonenhaushalte (Singlehaushalte)", "Insgesamt", "Seniors"), |
746
|
|
|
("Alleinerziehende Elternteile", "Insgesamt", "Seniors"), |
747
|
|
|
], |
748
|
|
|
# Single Seniors Single Parents Seniors |
749
|
|
|
"SO": [ |
750
|
|
|
("Einpersonenhaushalte (Singlehaushalte)", "Insgesamt", "Adults") |
751
|
|
|
], # Single Adults |
752
|
|
|
"SK": [("Alleinerziehende Elternteile", "Insgesamt", "Adults")], |
753
|
|
|
# Single Parents Adult |
754
|
|
|
"PR": [ |
755
|
|
|
("Paare ohne Kind(er)", "2 Personen", "Seniors"), |
756
|
|
|
( |
757
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
758
|
|
|
"2 Personen", |
759
|
|
|
"Seniors", |
760
|
|
|
), |
761
|
|
|
], |
762
|
|
|
# Couples without Kids Senior & same sex couples & shared flat seniors |
763
|
|
|
"PO": [ |
764
|
|
|
("Paare ohne Kind(er)", "2 Personen", "Adults"), |
765
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "2 Personen", "Adults"), |
766
|
|
|
], |
767
|
|
|
# Couples without Kids adults & same sex couples & shared flat adults |
768
|
|
|
"P1": [("Paare mit Kind(ern)", "3 Personen", "Adults")], |
769
|
|
|
"P2": [("Paare mit Kind(ern)", "4 Personen", "Adults")], |
770
|
|
|
"P3": [ |
771
|
|
|
("Paare mit Kind(ern)", "5 Personen", "Adults"), |
772
|
|
|
("Paare mit Kind(ern)", "6 und mehr Personen", "Adults"), |
773
|
|
|
], |
774
|
|
|
"OR": [ |
775
|
|
|
( |
776
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
777
|
|
|
"3 Personen", |
778
|
|
|
"Seniors", |
779
|
|
|
), |
780
|
|
|
( |
781
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
782
|
|
|
"4 Personen", |
783
|
|
|
"Seniors", |
784
|
|
|
), |
785
|
|
|
( |
786
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
787
|
|
|
"5 Personen", |
788
|
|
|
"Seniors", |
789
|
|
|
), |
790
|
|
|
( |
791
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
792
|
|
|
"6 und mehr Personen", |
793
|
|
|
"Seniors", |
794
|
|
|
), |
795
|
|
|
("Paare mit Kind(ern)", "3 Personen", "Seniors"), |
796
|
|
|
("Paare ohne Kind(er)", "3 Personen", "Seniors"), |
797
|
|
|
("Paare mit Kind(ern)", "4 Personen", "Seniors"), |
798
|
|
|
("Paare ohne Kind(er)", "4 Personen", "Seniors"), |
799
|
|
|
("Paare mit Kind(ern)", "5 Personen", "Seniors"), |
800
|
|
|
("Paare ohne Kind(er)", "5 Personen", "Seniors"), |
801
|
|
|
("Paare mit Kind(ern)", "6 und mehr Personen", "Seniors"), |
802
|
|
|
("Paare ohne Kind(er)", "6 und mehr Personen", "Seniors"), |
803
|
|
|
], |
804
|
|
|
# no info about share of kids |
805
|
|
|
# OO, O1, O2 have the same amount, as no information about the share of |
806
|
|
|
# kids within census data set. |
807
|
|
|
"OO": [ |
808
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "3 Personen", "Adults"), |
809
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "4 Personen", "Adults"), |
810
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "5 Personen", "Adults"), |
811
|
|
|
( |
812
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
813
|
|
|
"6 und mehr Personen", |
814
|
|
|
"Adults", |
815
|
|
|
), |
816
|
|
|
("Paare ohne Kind(er)", "3 Personen", "Adults"), |
817
|
|
|
("Paare ohne Kind(er)", "4 Personen", "Adults"), |
818
|
|
|
("Paare ohne Kind(er)", "5 Personen", "Adults"), |
819
|
|
|
("Paare ohne Kind(er)", "6 und mehr Personen", "Adults"), |
820
|
|
|
], |
821
|
|
|
# no info about share of kids |
822
|
|
|
} |
823
|
|
|
|
824
|
|
|
# absolute values |
825
|
|
|
df_hh_distribution_abs = pd.DataFrame( |
826
|
|
|
( |
827
|
|
|
{ |
828
|
|
|
hhtype: df_census_households_nuts1.loc[countries, codes].sum() |
|
|
|
|
829
|
|
|
for hhtype, codes in hh_types_eurostat.items() |
830
|
|
|
} |
831
|
|
|
for countries in df_census_households_nuts1.index |
832
|
|
|
), |
833
|
|
|
index=df_census_households_nuts1.index, |
834
|
|
|
) |
835
|
|
|
# drop zero columns |
836
|
|
|
df_hh_distribution_abs = df_hh_distribution_abs.loc[ |
837
|
|
|
:, (df_hh_distribution_abs != 0).any(axis=0) |
838
|
|
|
].T |
839
|
|
|
|
840
|
|
|
return df_hh_distribution_abs |
841
|
|
|
|
842
|
|
|
|
843
|
|
|
def inhabitants_to_households(df_hh_people_distribution_abs): |
844
|
|
|
""" |
845
|
|
|
Convert number of inhabitant to number of household types |
846
|
|
|
|
847
|
|
|
Takes the distribution of peoples living in types of households to |
848
|
|
|
calculate a distribution of household types by using a people-in-household |
849
|
|
|
mapping. Results are not rounded to int as it will be used to calculate |
850
|
|
|
a relative distribution anyways. |
851
|
|
|
The data of category 'HHGROESS_KLASS' in census households |
852
|
|
|
at grid level is used to determine an average wherever the amount |
853
|
|
|
of people is not trivial (OR, OO). Kids are not counted. |
854
|
|
|
|
855
|
|
|
Parameters |
856
|
|
|
---------- |
857
|
|
|
df_hh_people_distribution_abs: pd.DataFrame |
858
|
|
|
Grouped census household data on NUTS-1 level in absolute values |
859
|
|
|
|
860
|
|
|
Returns |
861
|
|
|
---------- |
862
|
|
|
df_dist_households: pd.DataFrame |
863
|
|
|
Distribution of households type |
864
|
|
|
|
865
|
|
|
""" |
866
|
|
|
|
867
|
|
|
# Get household size for each census cell grouped by |
868
|
|
|
# As this is only used to estimate size of households for OR, OO |
869
|
|
|
# The hh types 1 P and 2 P households are dropped |
870
|
|
|
df_hh_size = db.select_dataframe( |
871
|
|
|
sql=""" |
872
|
|
|
SELECT characteristics_text, SUM(quantity) as summe |
873
|
|
|
FROM society.egon_destatis_zensus_household_per_ha as egon_d |
874
|
|
|
WHERE attribute = 'HHGROESS_KLASS' AND quantity_q < 2 |
875
|
|
|
GROUP BY characteristics_text """, |
876
|
|
|
index_col="characteristics_text", |
877
|
|
|
) |
878
|
|
|
df_hh_size = df_hh_size.drop(index=["1 Person", "2 Personen"]) |
879
|
|
|
|
880
|
|
|
# Define/ estimate number of persons (w/o kids) for each household category |
881
|
|
|
# For categories S* and P* it's clear; for multi-person households (OO,OR) |
882
|
|
|
# the number is estimated as average by taking remaining persons |
883
|
|
|
OO_factor = ( |
884
|
|
|
sum(df_hh_size["summe"] * [3, 4, 5, 6]) / df_hh_size["summe"].sum() |
885
|
|
|
) |
886
|
|
|
mapping_people_in_households = { |
887
|
|
|
"SR": 1, |
888
|
|
|
"SO": 1, |
889
|
|
|
"SK": 1, # kids are excluded |
890
|
|
|
"PR": 2, |
891
|
|
|
"PO": 2, |
892
|
|
|
"P1": 2, # kids are excluded |
893
|
|
|
"P2": 2, # "" |
894
|
|
|
"P3": 2, # "" |
895
|
|
|
"OR": OO_factor, |
896
|
|
|
"OO": OO_factor, |
897
|
|
|
} |
898
|
|
|
|
899
|
|
|
# compare categories and remove form mapping if to many |
900
|
|
|
diff = set(df_hh_people_distribution_abs.index) ^ set( |
901
|
|
|
mapping_people_in_households.keys() |
902
|
|
|
) |
903
|
|
|
|
904
|
|
|
if bool(diff): |
905
|
|
|
for key in diff: |
906
|
|
|
mapping_people_in_households = dict(mapping_people_in_households) |
907
|
|
|
del mapping_people_in_households[key] |
908
|
|
|
print(f"Removed {diff} from mapping!") |
909
|
|
|
|
910
|
|
|
# divide amount of people by people in household types |
911
|
|
|
df_dist_households = df_hh_people_distribution_abs.div( |
912
|
|
|
mapping_people_in_households, axis=0 |
913
|
|
|
) |
914
|
|
|
|
915
|
|
|
return df_dist_households |
916
|
|
|
|
917
|
|
|
|
918
|
|
|
def impute_missing_hh_in_populated_cells(df_census_households_grid): |
919
|
|
|
""" |
920
|
|
|
Fills in missing household data in populated cells based on a random selection from |
921
|
|
|
a subgroup of cells with the same population value. |
922
|
|
|
|
923
|
|
|
There are cells without household data but a population. A randomly |
924
|
|
|
chosen household distribution is taken from a subgroup of cells with same |
925
|
|
|
population value and applied to all cells with missing household |
926
|
|
|
distribution and the specific population value. In the case, in which there |
927
|
|
|
is no subgroup with household data of the respective population value, the |
928
|
|
|
fallback is the subgroup with the last last smaller population value. |
929
|
|
|
|
930
|
|
|
Parameters |
931
|
|
|
---------- |
932
|
|
|
df_census_households_grid: pd.DataFrame |
933
|
|
|
census household data at 100x100m grid level |
934
|
|
|
|
935
|
|
|
Returns |
936
|
|
|
------- |
937
|
|
|
pd.DataFrame |
938
|
|
|
substituted census household data at 100x100m grid level""" |
939
|
|
|
|
940
|
|
|
df_w_hh = df_census_households_grid.dropna().reset_index(drop=True) |
941
|
|
|
df_wo_hh = df_census_households_grid.loc[ |
942
|
|
|
df_census_households_grid.isna().any(axis=1) |
943
|
|
|
].reset_index(drop=True) |
944
|
|
|
|
945
|
|
|
# iterate over unique population values |
946
|
|
|
for population in df_wo_hh["population"].sort_values().unique(): |
947
|
|
|
|
948
|
|
|
# create fallback if no cell with specific population available |
949
|
|
|
if population in df_w_hh["population"].unique(): |
950
|
|
|
fallback_value = population |
951
|
|
|
population_value = population |
952
|
|
|
# use fallback of last possible household distribution |
953
|
|
|
else: |
954
|
|
|
population_value = fallback_value |
|
|
|
|
955
|
|
|
|
956
|
|
|
# get cells with specific population value from cells with |
957
|
|
|
# household distribution |
958
|
|
|
df_w_hh_population_i = df_w_hh.loc[ |
959
|
|
|
df_w_hh["population"] == population_value |
960
|
|
|
] |
961
|
|
|
# choose random cell within this group |
962
|
|
|
rnd_cell_id_population_i = np.random.choice( |
963
|
|
|
df_w_hh_population_i["cell_id"].unique() |
964
|
|
|
) |
965
|
|
|
# get household distribution of this cell |
966
|
|
|
df_rand_hh_distribution = df_w_hh_population_i.loc[ |
967
|
|
|
df_w_hh_population_i["cell_id"] == rnd_cell_id_population_i |
968
|
|
|
] |
969
|
|
|
# get cells with specific population value from cells without |
970
|
|
|
# household distribution |
971
|
|
|
df_wo_hh_population_i = df_wo_hh.loc[ |
972
|
|
|
df_wo_hh["population"] == population |
973
|
|
|
] |
974
|
|
|
|
975
|
|
|
# all cells will get the same random household distribution |
976
|
|
|
|
977
|
|
|
# prepare size of dataframe by number of household types |
978
|
|
|
df_repeated = pd.concat( |
979
|
|
|
[df_wo_hh_population_i] * df_rand_hh_distribution.shape[0], |
980
|
|
|
ignore_index=True, |
981
|
|
|
) |
982
|
|
|
df_repeated = df_repeated.sort_values("cell_id").reset_index(drop=True) |
983
|
|
|
|
984
|
|
|
# insert random household distribution |
985
|
|
|
columns = ["characteristics_code", "hh_5types"] |
986
|
|
|
df_repeated.loc[:, columns] = pd.concat( |
987
|
|
|
[df_rand_hh_distribution.loc[:, columns]] |
988
|
|
|
* df_wo_hh_population_i.shape[0] |
989
|
|
|
).values |
990
|
|
|
# append new cells |
991
|
|
|
df_w_hh = pd.concat([df_w_hh, df_repeated], ignore_index=True) |
992
|
|
|
|
993
|
|
|
return df_w_hh |
994
|
|
|
|
995
|
|
|
|
996
|
|
|
def get_census_households_grid(): |
997
|
|
|
""" |
998
|
|
|
Retrieves and adjusts census household data at 100x100m grid level, accounting for |
999
|
|
|
missing or divergent data. |
1000
|
|
|
|
1001
|
|
|
Query census household data at 100x100m grid level from database. As |
1002
|
|
|
there is a divergence in the census household data depending which |
1003
|
|
|
attribute is used. There also exist cells without household but with |
1004
|
|
|
population data. The missing data in these cases are substituted. First |
1005
|
|
|
census household data with attribute 'HHTYP_FAM' is missing for some |
1006
|
|
|
cells with small amount of households. This data is generated using the |
1007
|
|
|
average share of household types for cells with similar household number. |
1008
|
|
|
For some cells the summed amount of households per type deviates from the |
1009
|
|
|
total number with attribute 'INSGESAMT'. As the profiles are scaled with |
1010
|
|
|
demand-regio data at nuts3-level the impact at a higher aggregation level |
1011
|
|
|
is negligible. For sake of simplicity, the data is not corrected. |
1012
|
|
|
|
1013
|
|
|
Returns |
1014
|
|
|
------- |
1015
|
|
|
pd.DataFrame |
1016
|
|
|
census household data at 100x100m grid level""" |
1017
|
|
|
|
1018
|
|
|
# Retrieve information about households for each census cell |
1019
|
|
|
# Only use cell-data which quality (quantity_q<2) is acceptable |
1020
|
|
|
df_census_households_grid = db.select_dataframe( |
1021
|
|
|
sql=""" |
1022
|
|
|
SELECT grid_id, attribute, characteristics_code, |
1023
|
|
|
characteristics_text, quantity |
1024
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1025
|
|
|
WHERE attribute = 'HHTYP_FAM' AND quantity_q <2""" |
1026
|
|
|
) |
1027
|
|
|
df_census_households_grid = df_census_households_grid.drop( |
1028
|
|
|
columns=["attribute", "characteristics_text"] |
1029
|
|
|
) |
1030
|
|
|
|
1031
|
|
|
# Missing data is detected |
1032
|
|
|
df_missing_data = db.select_dataframe( |
1033
|
|
|
sql=""" |
1034
|
|
|
SELECT count(joined.quantity_gesamt) as amount, |
1035
|
|
|
joined.quantity_gesamt as households |
1036
|
|
|
FROM( |
1037
|
|
|
SELECT t2.grid_id, quantity_gesamt, quantity_sum_fam, |
1038
|
|
|
(quantity_gesamt-(case when quantity_sum_fam isnull |
1039
|
|
|
then 0 else quantity_sum_fam end)) |
1040
|
|
|
as insgesamt_minus_fam |
1041
|
|
|
FROM ( |
1042
|
|
|
SELECT grid_id, SUM(quantity) as quantity_sum_fam |
1043
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1044
|
|
|
WHERE attribute = 'HHTYP_FAM' |
1045
|
|
|
GROUP BY grid_id) as t1 |
1046
|
|
|
Full JOIN ( |
1047
|
|
|
SELECT grid_id, sum(quantity) as quantity_gesamt |
1048
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1049
|
|
|
WHERE attribute = 'INSGESAMT' |
1050
|
|
|
GROUP BY grid_id) as t2 ON t1.grid_id = t2.grid_id |
1051
|
|
|
) as joined |
1052
|
|
|
WHERE quantity_sum_fam isnull |
1053
|
|
|
Group by quantity_gesamt """ |
1054
|
|
|
) |
1055
|
|
|
missing_cells = db.select_dataframe( |
1056
|
|
|
sql=""" |
1057
|
|
|
SELECT t12.grid_id, t12.quantity |
1058
|
|
|
FROM ( |
1059
|
|
|
SELECT t2.grid_id, (case when quantity_sum_fam isnull |
1060
|
|
|
then quantity_gesamt end) as quantity |
1061
|
|
|
FROM ( |
1062
|
|
|
SELECT grid_id, SUM(quantity) as quantity_sum_fam |
1063
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1064
|
|
|
WHERE attribute = 'HHTYP_FAM' |
1065
|
|
|
GROUP BY grid_id) as t1 |
1066
|
|
|
Full JOIN ( |
1067
|
|
|
SELECT grid_id, sum(quantity) as quantity_gesamt |
1068
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1069
|
|
|
WHERE attribute = 'INSGESAMT' |
1070
|
|
|
GROUP BY grid_id) as t2 ON t1.grid_id = t2.grid_id |
1071
|
|
|
) as t12 |
1072
|
|
|
WHERE quantity is not null""" |
1073
|
|
|
) |
1074
|
|
|
|
1075
|
|
|
# Missing cells are substituted by average share of cells with same amount |
1076
|
|
|
# of households. |
1077
|
|
|
df_average_split = create_missing_zensus_data( |
1078
|
|
|
df_census_households_grid, df_missing_data, missing_cells |
1079
|
|
|
) |
1080
|
|
|
|
1081
|
|
|
df_census_households_grid = df_census_households_grid.rename( |
1082
|
|
|
columns={"quantity": "hh_5types"} |
1083
|
|
|
) |
1084
|
|
|
|
1085
|
|
|
df_census_households_grid = pd.concat( |
1086
|
|
|
[df_census_households_grid, df_average_split], ignore_index=True |
1087
|
|
|
) |
1088
|
|
|
|
1089
|
|
|
# Census cells with nuts3 and nuts1 information |
1090
|
|
|
df_grid_id = db.select_dataframe( |
1091
|
|
|
sql=""" |
1092
|
|
|
SELECT pop.grid_id, pop.id as cell_id, pop.population, |
1093
|
|
|
vg250.vg250_nuts3 as nuts3, lan.nuts as nuts1, lan.gen |
1094
|
|
|
FROM |
1095
|
|
|
society.destatis_zensus_population_per_ha_inside_germany as pop |
1096
|
|
|
LEFT JOIN boundaries.egon_map_zensus_vg250 as vg250 |
1097
|
|
|
ON (pop.id=vg250.zensus_population_id) |
1098
|
|
|
LEFT JOIN boundaries.vg250_lan as lan |
1099
|
|
|
ON (LEFT(vg250.vg250_nuts3, 3) = lan.nuts) |
1100
|
|
|
WHERE lan.gf = 4 """ |
1101
|
|
|
) |
1102
|
|
|
df_grid_id = df_grid_id.drop_duplicates() |
1103
|
|
|
df_grid_id = df_grid_id.reset_index(drop=True) |
1104
|
|
|
|
1105
|
|
|
# Merge household type and size data with considered (populated) census |
1106
|
|
|
# cells how='right' is used as ids of unpopulated areas are removed |
1107
|
|
|
# by df_grid_id or ancestors. See here: |
1108
|
|
|
# https://github.com/openego/eGon-data/blob/add4944456f22b8873504c5f579b61dca286e357/src/egon/data/datasets/zensus_vg250.py#L269 |
1109
|
|
|
df_census_households_grid = pd.merge( |
1110
|
|
|
df_census_households_grid, |
1111
|
|
|
df_grid_id, |
1112
|
|
|
left_on="grid_id", |
1113
|
|
|
right_on="grid_id", |
1114
|
|
|
how="right", |
1115
|
|
|
) |
1116
|
|
|
df_census_households_grid = df_census_households_grid.sort_values( |
1117
|
|
|
["cell_id", "characteristics_code"] |
1118
|
|
|
) |
1119
|
|
|
|
1120
|
|
|
return df_census_households_grid |
1121
|
|
|
|
1122
|
|
|
|
1123
|
|
|
def proportionate_allocation( |
1124
|
|
|
df_group, dist_households_nuts1, hh_10types_cluster |
1125
|
|
|
): |
1126
|
|
|
"""Household distribution at nuts1 are applied at census cell within group |
1127
|
|
|
|
1128
|
|
|
To refine the hh_5types and keep the distribution at nuts1 level, |
1129
|
|
|
the household types are clustered and drawn with proportionate weighting. |
1130
|
|
|
The resulting pool is splitted into subgroups with sizes according to |
1131
|
|
|
the number of households of clusters in cells. |
1132
|
|
|
|
1133
|
|
|
Parameters |
1134
|
|
|
---------- |
1135
|
|
|
df_group: pd.DataFrame |
1136
|
|
|
Census household data at grid level for specific hh_5type cluster in |
1137
|
|
|
a federal state |
1138
|
|
|
dist_households_nuts1: pd.Series |
1139
|
|
|
Household distribution of of hh_10types in a federal state |
1140
|
|
|
hh_10types_cluster: list of str |
1141
|
|
|
Cluster of household types to be refined to |
1142
|
|
|
|
1143
|
|
|
Returns |
1144
|
|
|
------- |
1145
|
|
|
pd.DataFrame |
1146
|
|
|
Refined household data with hh_10types of cluster at nuts1 level |
1147
|
|
|
""" |
1148
|
|
|
|
1149
|
|
|
# get probability of households within hh_5types group |
1150
|
|
|
probability = dist_households_nuts1[hh_10types_cluster].values |
1151
|
|
|
# get total number of households within hh_5types group in federal state |
1152
|
|
|
size = df_group["hh_5types"].sum().astype(int) |
1153
|
|
|
|
1154
|
|
|
# random sample within hh_5types group with probability for whole federal |
1155
|
|
|
# state |
1156
|
|
|
choices = np.random.choice( |
1157
|
|
|
a=hh_10types_cluster, size=size, replace=True, p=probability |
1158
|
|
|
) |
1159
|
|
|
# get section sizes to split the sample pool from federal state to grid |
1160
|
|
|
# cells |
1161
|
|
|
split_sections = df_group["hh_5types"].cumsum().astype(int)[:-1] |
1162
|
|
|
# split into grid cell groups |
1163
|
|
|
samples = np.split(choices, split_sections) |
1164
|
|
|
# count number of hh_10types for each cell |
1165
|
|
|
sample_count = [np.unique(x, return_counts=True) for x in samples] |
1166
|
|
|
|
1167
|
|
|
df_distribution = pd.DataFrame( |
1168
|
|
|
sample_count, columns=["hh_type", "hh_10types"] |
1169
|
|
|
) |
1170
|
|
|
# add cell_ids |
1171
|
|
|
df_distribution["cell_id"] = df_group["cell_id"].unique() |
1172
|
|
|
|
1173
|
|
|
# unnest |
1174
|
|
|
df_distribution = ( |
1175
|
|
|
df_distribution.apply(pd.Series.explode) |
1176
|
|
|
.reset_index(drop=True) |
1177
|
|
|
.dropna() |
1178
|
|
|
) |
1179
|
|
|
|
1180
|
|
|
return df_distribution |
1181
|
|
|
|
1182
|
|
|
|
1183
|
|
|
def refine_census_data_at_cell_level( |
1184
|
|
|
df_census_households_grid, |
1185
|
|
|
df_census_households_nuts1, |
1186
|
|
|
): |
1187
|
|
|
""" |
1188
|
|
|
Processes and merges census data to specify household numbers and types per census |
1189
|
|
|
cell according to IEE profiles. |
1190
|
|
|
|
1191
|
|
|
The census data is processed to define the number and type of households |
1192
|
|
|
per zensus cell. Two subsets of the census data are merged to fit the |
1193
|
|
|
IEE profiles specifications. To do this, proportionate allocation is |
1194
|
|
|
applied at nuts1 level and within household type clusters. |
1195
|
|
|
|
1196
|
|
|
.. list-table:: Mapping table |
1197
|
|
|
:header-rows: 1 |
1198
|
|
|
|
1199
|
|
|
* - characteristics_code |
1200
|
|
|
- characteristics_text |
1201
|
|
|
- mapping |
1202
|
|
|
* - 1 |
1203
|
|
|
- Einpersonenhaushalte (Singlehaushalte) |
1204
|
|
|
- SR; SO |
1205
|
|
|
* - 2 |
1206
|
|
|
- Paare ohne Kind(er) |
1207
|
|
|
- PR; PO |
1208
|
|
|
* - 3 |
1209
|
|
|
- Paare mit Kind(ern) |
1210
|
|
|
- P1; P2; P3 |
1211
|
|
|
* - 4 |
1212
|
|
|
- Alleinerziehende Elternteile |
1213
|
|
|
- SK |
1214
|
|
|
* - 5 |
1215
|
|
|
- Mehrpersonenhaushalte ohne Kernfamilie |
1216
|
|
|
- OR; OO |
1217
|
|
|
|
1218
|
|
|
Parameters |
1219
|
|
|
---------- |
1220
|
|
|
df_census_households_grid: pd.DataFrame |
1221
|
|
|
Aggregated zensus household data on 100x100m grid level |
1222
|
|
|
df_census_households_nuts1: pd.DataFrame |
1223
|
|
|
Aggregated zensus household data on NUTS-1 level |
1224
|
|
|
|
1225
|
|
|
Returns |
1226
|
|
|
------- |
1227
|
|
|
pd.DataFrame |
1228
|
|
|
Number of hh types per census cell |
1229
|
|
|
""" |
1230
|
|
|
mapping_zensus_hh_subgroups = { |
1231
|
|
|
1: ["SR", "SO"], |
1232
|
|
|
2: ["PR", "PO"], |
1233
|
|
|
3: ["P1", "P2", "P3"], |
1234
|
|
|
4: ["SK"], |
1235
|
|
|
5: ["OR", "OO"], |
1236
|
|
|
} |
1237
|
|
|
|
1238
|
|
|
# Calculate fraction of fine household types within subgroup of |
1239
|
|
|
# rough household types |
1240
|
|
|
df_dist_households = df_census_households_nuts1.copy() |
1241
|
|
|
for value in mapping_zensus_hh_subgroups.values(): |
1242
|
|
|
df_dist_households.loc[value] = df_census_households_nuts1.loc[ |
1243
|
|
|
value |
1244
|
|
|
].div(df_census_households_nuts1.loc[value].sum()) |
1245
|
|
|
|
1246
|
|
|
# Refine from hh_5types to hh_10types |
1247
|
|
|
df_distribution_nuts0 = pd.DataFrame() |
1248
|
|
|
# Loop over federal states |
1249
|
|
|
for gen, df_nuts1 in df_census_households_grid.groupby("gen"): |
1250
|
|
|
# take subgroup distribution from federal state |
1251
|
|
|
dist_households_nuts1 = df_dist_households[gen] |
1252
|
|
|
|
1253
|
|
|
df_distribution_nuts1 = pd.DataFrame() |
1254
|
|
|
# loop over hh_5types as cluster |
1255
|
|
|
for ( |
1256
|
|
|
hh_5type_cluster, |
1257
|
|
|
hh_10types_cluster, |
1258
|
|
|
) in mapping_zensus_hh_subgroups.items(): |
1259
|
|
|
# get census household of hh_5type and federal state |
1260
|
|
|
df_group = df_nuts1.loc[ |
1261
|
|
|
df_nuts1["characteristics_code"] == hh_5type_cluster |
1262
|
|
|
] |
1263
|
|
|
|
1264
|
|
|
# apply proportionate allocation function within cluster |
1265
|
|
|
df_distribution_group = proportionate_allocation( |
1266
|
|
|
df_group, dist_households_nuts1, hh_10types_cluster |
1267
|
|
|
) |
1268
|
|
|
df_distribution_group["characteristics_code"] = hh_5type_cluster |
1269
|
|
|
df_distribution_nuts1 = pd.concat( |
1270
|
|
|
[df_distribution_nuts1, df_distribution_group], |
1271
|
|
|
ignore_index=True, |
1272
|
|
|
) |
1273
|
|
|
|
1274
|
|
|
df_distribution_nuts0 = pd.concat( |
1275
|
|
|
[df_distribution_nuts0, df_distribution_nuts1], ignore_index=True |
1276
|
|
|
) |
1277
|
|
|
|
1278
|
|
|
df_census_households_grid_refined = df_census_households_grid.merge( |
1279
|
|
|
df_distribution_nuts0, |
1280
|
|
|
how="inner", |
1281
|
|
|
left_on=["cell_id", "characteristics_code"], |
1282
|
|
|
right_on=["cell_id", "characteristics_code"], |
1283
|
|
|
) |
1284
|
|
|
|
1285
|
|
|
df_census_households_grid_refined["characteristics_code"] = ( |
1286
|
|
|
df_census_households_grid_refined["characteristics_code"].astype(int) |
1287
|
|
|
) |
1288
|
|
|
df_census_households_grid_refined["hh_5types"] = ( |
1289
|
|
|
df_census_households_grid_refined["hh_5types"].astype(int) |
1290
|
|
|
) |
1291
|
|
|
df_census_households_grid_refined["hh_10types"] = ( |
1292
|
|
|
df_census_households_grid_refined["hh_10types"].astype(int) |
1293
|
|
|
) |
1294
|
|
|
|
1295
|
|
|
return df_census_households_grid_refined |
1296
|
|
|
|
1297
|
|
|
|
1298
|
|
|
def get_cell_demand_profile_ids(df_cell, pool_size): |
1299
|
|
|
""" |
1300
|
|
|
Generates tuple of hh_type and zensus cell ids |
1301
|
|
|
|
1302
|
|
|
Takes a random sample of profile ids for given cell: |
1303
|
|
|
* if pool size >= sample size: without replacement |
1304
|
|
|
* if pool size < sample size: with replacement |
1305
|
|
|
|
1306
|
|
|
|
1307
|
|
|
Parameters |
1308
|
|
|
---------- |
1309
|
|
|
df_cell: pd.DataFrame |
1310
|
|
|
Household type information for a single zensus cell |
1311
|
|
|
pool_size: int |
1312
|
|
|
Number of available profiles to select from |
1313
|
|
|
|
1314
|
|
|
Returns |
1315
|
|
|
------- |
1316
|
|
|
list of tuple |
1317
|
|
|
List of (`hh_type`, `cell_id`) |
1318
|
|
|
|
1319
|
|
|
""" |
1320
|
|
|
# maybe use instead |
1321
|
|
|
# np.random.default_rng().integers(low=0, high=pool_size[hh_type], size=sq) |
1322
|
|
|
# instead of random.sample use random.choices() if with replacement |
1323
|
|
|
# list of sample ids per hh_type in cell |
1324
|
|
|
cell_profile_ids = [ |
1325
|
|
|
( |
1326
|
|
|
(hh_type, random.sample(range(pool_size[hh_type]), k=sq)) |
1327
|
|
|
if pool_size[hh_type] >= sq |
1328
|
|
|
else (hh_type, random.choices(range(pool_size[hh_type]), k=sq)) |
1329
|
|
|
) |
1330
|
|
|
for hh_type, sq in zip( |
1331
|
|
|
df_cell["hh_type"], |
1332
|
|
|
df_cell["hh_10types"], |
1333
|
|
|
) |
1334
|
|
|
] |
1335
|
|
|
|
1336
|
|
|
# format to lists of tuples (hh_type, id) |
1337
|
|
|
cell_profile_ids = [ |
1338
|
|
|
list(zip(cycle([hh_type]), ids)) for hh_type, ids in cell_profile_ids |
1339
|
|
|
] |
1340
|
|
|
# reduce to list |
1341
|
|
|
cell_profile_ids = [a for b in cell_profile_ids for a in b] |
1342
|
|
|
|
1343
|
|
|
return cell_profile_ids |
1344
|
|
|
|
1345
|
|
|
|
1346
|
|
|
# can be parallelized with grouping df_zensus_cells by grid_id/nuts3/nuts1 |
1347
|
|
|
def assign_hh_demand_profiles_to_cells(df_zensus_cells, df_iee_profiles): |
1348
|
|
|
""" |
1349
|
|
|
Assign household demand profiles to each census cell. |
1350
|
|
|
|
1351
|
|
|
A table including the demand profile ids for each cell is created by using |
1352
|
|
|
:func:`get_cell_demand_profile_ids`. Household profiles are randomly |
1353
|
|
|
sampled for each cell. The profiles are not replaced to the pool within |
1354
|
|
|
a cell but after. |
1355
|
|
|
|
1356
|
|
|
Parameters |
1357
|
|
|
---------- |
1358
|
|
|
df_zensus_cells: pd.DataFrame |
1359
|
|
|
Household type parameters. Each row representing one household. Hence, |
1360
|
|
|
multiple rows per zensus cell. |
1361
|
|
|
df_iee_profiles: pd.DataFrame |
1362
|
|
|
Household load profile data |
1363
|
|
|
|
1364
|
|
|
* Index: Times steps as serial integers |
1365
|
|
|
* Columns: pd.MultiIndex with (`HH_TYPE`, `id`) |
1366
|
|
|
|
1367
|
|
|
Returns |
1368
|
|
|
------- |
1369
|
|
|
pd.DataFrame |
1370
|
|
|
Tabular data with one row represents one zensus cell. |
1371
|
|
|
The column `cell_profile_ids` contains |
1372
|
|
|
a list of tuples (see :func:`get_cell_demand_profile_ids`) providing a |
1373
|
|
|
reference to the actual load profiles that are associated with this |
1374
|
|
|
cell. |
1375
|
|
|
""" |
1376
|
|
|
|
1377
|
|
|
df_hh_profiles_in_census_cells = pd.DataFrame( |
1378
|
|
|
index=df_zensus_cells.grid_id.unique(), |
1379
|
|
|
columns=[ |
1380
|
|
|
"cell_profile_ids", |
1381
|
|
|
"cell_id", |
1382
|
|
|
"nuts3", |
1383
|
|
|
"nuts1", |
1384
|
|
|
"factor_2035", |
1385
|
|
|
"factor_2050", |
1386
|
|
|
], |
1387
|
|
|
) |
1388
|
|
|
|
1389
|
|
|
df_hh_profiles_in_census_cells = ( |
1390
|
|
|
df_hh_profiles_in_census_cells.rename_axis("grid_id") |
1391
|
|
|
) |
1392
|
|
|
|
1393
|
|
|
pool_size = df_iee_profiles.groupby(level=0, axis=1).size() |
1394
|
|
|
|
1395
|
|
|
# only use non zero entries |
1396
|
|
|
df_zensus_cells = df_zensus_cells.loc[df_zensus_cells["hh_10types"] != 0] |
1397
|
|
|
for grid_id, df_cell in df_zensus_cells.groupby(by="grid_id"): |
1398
|
|
|
# random sampling of household profiles for each cell |
1399
|
|
|
# with or without replacement (see :func:`get_cell_demand_profile_ids`) |
1400
|
|
|
# within cell but after number of households are rounded to the nearest |
1401
|
|
|
# integer if float this results in a small deviation for the course of |
1402
|
|
|
# the aggregated profiles. |
1403
|
|
|
cell_profile_ids = get_cell_demand_profile_ids(df_cell, pool_size) |
1404
|
|
|
|
1405
|
|
|
df_hh_profiles_in_census_cells.at[grid_id, "cell_id"] = df_cell.loc[ |
1406
|
|
|
:, "cell_id" |
1407
|
|
|
].unique()[0] |
1408
|
|
|
df_hh_profiles_in_census_cells.at[grid_id, "cell_profile_ids"] = ( |
1409
|
|
|
cell_profile_ids |
1410
|
|
|
) |
1411
|
|
|
df_hh_profiles_in_census_cells.at[grid_id, "nuts3"] = df_cell.loc[ |
1412
|
|
|
:, "nuts3" |
1413
|
|
|
].unique()[0] |
1414
|
|
|
df_hh_profiles_in_census_cells.at[grid_id, "nuts1"] = df_cell.loc[ |
1415
|
|
|
:, "nuts1" |
1416
|
|
|
].unique()[0] |
1417
|
|
|
|
1418
|
|
|
return df_hh_profiles_in_census_cells |
1419
|
|
|
|
1420
|
|
|
|
1421
|
|
|
# can be parallelized with grouping df_zensus_cells by grid_id/nuts3/nuts1 |
1422
|
|
|
def adjust_to_demand_regio_nuts3_annual( |
1423
|
|
|
df_hh_profiles_in_census_cells, df_iee_profiles, df_demand_regio |
1424
|
|
|
): |
1425
|
|
|
""" |
1426
|
|
|
Computes the profile scaling factor for alignment to demand regio data |
1427
|
|
|
|
1428
|
|
|
The scaling factor can be used to re-scale each load profile such that the |
1429
|
|
|
sum of all load profiles within one NUTS-3 area equals the annual demand |
1430
|
|
|
of demand regio data. |
1431
|
|
|
|
1432
|
|
|
Parameters |
1433
|
|
|
---------- |
1434
|
|
|
df_hh_profiles_in_census_cells: pd.DataFrame |
1435
|
|
|
Result of :func:`assign_hh_demand_profiles_to_cells`. |
1436
|
|
|
df_iee_profiles: pd.DataFrame |
1437
|
|
|
Household load profile data |
1438
|
|
|
|
1439
|
|
|
* Index: Times steps as serial integers |
1440
|
|
|
* Columns: pd.MultiIndex with (`HH_TYPE`, `id`) |
1441
|
|
|
|
1442
|
|
|
df_demand_regio: pd.DataFrame |
1443
|
|
|
Annual demand by demand regio for each NUTS-3 region and scenario year. |
1444
|
|
|
Index is pd.MultiIndex with :code:`tuple(scenario_year, nuts3_code)`. |
1445
|
|
|
|
1446
|
|
|
Returns |
1447
|
|
|
------- |
1448
|
|
|
pd.DataFrame |
1449
|
|
|
Returns the same data as :func:`assign_hh_demand_profiles_to_cells`, |
1450
|
|
|
but with filled columns `factor_2035` and `factor_2050`. |
1451
|
|
|
""" |
1452
|
|
|
for nuts3_id, df_nuts3 in df_hh_profiles_in_census_cells.groupby( |
1453
|
|
|
by="nuts3" |
1454
|
|
|
): |
1455
|
|
|
nuts3_cell_ids = df_nuts3.index |
1456
|
|
|
nuts3_profile_ids = df_nuts3.loc[:, "cell_profile_ids"].sum() |
1457
|
|
|
|
1458
|
|
|
# take all profiles of one nuts3, aggregate and sum |
1459
|
|
|
# profiles in Wh |
1460
|
|
|
nuts3_profiles_sum_annual = ( |
1461
|
|
|
df_iee_profiles.loc[:, nuts3_profile_ids].sum().sum() |
1462
|
|
|
) |
1463
|
|
|
|
1464
|
|
|
# Scaling Factor |
1465
|
|
|
# ############## |
1466
|
|
|
# demand regio in MWh |
1467
|
|
|
# profiles in Wh |
1468
|
|
|
|
1469
|
|
|
for scn in egon.data.config.settings()["egon-data"]["--scenarios"]: |
1470
|
|
|
year = get_scenario_year(scn) |
1471
|
|
|
df_hh_profiles_in_census_cells.loc[ |
1472
|
|
|
nuts3_cell_ids, f"factor_{year}" |
1473
|
|
|
] = ( |
1474
|
|
|
df_demand_regio.loc[(year, nuts3_id), "demand_mwha"] |
1475
|
|
|
* 1e3 |
1476
|
|
|
/ (nuts3_profiles_sum_annual / 1e3) |
1477
|
|
|
) |
1478
|
|
|
|
1479
|
|
|
return df_hh_profiles_in_census_cells |
1480
|
|
|
|
1481
|
|
|
|
1482
|
|
|
def get_load_timeseries( |
1483
|
|
|
df_iee_profiles, |
1484
|
|
|
df_hh_profiles_in_census_cells, |
1485
|
|
|
cell_ids, |
1486
|
|
|
year, |
1487
|
|
|
aggregate=True, |
1488
|
|
|
peak_load_only=False, |
1489
|
|
|
): |
1490
|
|
|
""" |
1491
|
|
|
Get peak load for one load area in MWh |
1492
|
|
|
|
1493
|
|
|
The peak load is calculated in aggregated manner for a group of zensus |
1494
|
|
|
cells that belong to one load area (defined by `cell_ids`). |
1495
|
|
|
|
1496
|
|
|
Parameters |
1497
|
|
|
---------- |
1498
|
|
|
df_iee_profiles: pd.DataFrame |
1499
|
|
|
Household load profile data in Wh |
1500
|
|
|
|
1501
|
|
|
* Index: Times steps as serial integers |
1502
|
|
|
* Columns: pd.MultiIndex with (`HH_TYPE`, `id`) |
1503
|
|
|
|
1504
|
|
|
Used to calculate the peak load from. |
1505
|
|
|
df_hh_profiles_in_census_cells: pd.DataFrame |
1506
|
|
|
Return value of :func:`adjust_to_demand_regio_nuts3_annual`. |
1507
|
|
|
cell_ids: list |
1508
|
|
|
Zensus cell ids that define one group of zensus cells that belong to |
1509
|
|
|
the same load area. |
1510
|
|
|
year: int |
1511
|
|
|
Scenario year. Is used to consider the scaling factor for aligning |
1512
|
|
|
annual demand to NUTS-3 data. |
1513
|
|
|
aggregate: bool |
1514
|
|
|
If true, all profiles are aggregated |
1515
|
|
|
peak_load_only: bool |
1516
|
|
|
If true, only the peak load value is returned (the type of the return |
1517
|
|
|
value is `float`). Defaults to False which returns the entire time |
1518
|
|
|
series as pd.Series. |
1519
|
|
|
|
1520
|
|
|
Returns |
1521
|
|
|
------- |
1522
|
|
|
pd.Series or float |
1523
|
|
|
Aggregated time series for given `cell_ids` or peak load of this time |
1524
|
|
|
series in MWh. |
1525
|
|
|
""" |
1526
|
|
|
timesteps = len(df_iee_profiles) |
1527
|
|
|
if aggregate: |
1528
|
|
|
full_load = pd.Series( |
1529
|
|
|
data=np.zeros(timesteps), dtype=np.float64, index=range(timesteps) |
1530
|
|
|
) |
1531
|
|
|
else: |
1532
|
|
|
full_load = pd.DataFrame(index=range(timesteps)) |
1533
|
|
|
load_area_meta = df_hh_profiles_in_census_cells.loc[ |
1534
|
|
|
cell_ids, ["cell_profile_ids", "nuts3", f"factor_{year}"] |
1535
|
|
|
] |
1536
|
|
|
# loop over nuts3 (part_load) and sum (full_load) as the scaling factor |
1537
|
|
|
# applies at nuts3 level |
1538
|
|
|
for (nuts3, factor), df in load_area_meta.groupby( |
1539
|
|
|
by=["nuts3", f"factor_{year}"] |
1540
|
|
|
): |
1541
|
|
|
if aggregate: |
1542
|
|
|
part_load = ( |
1543
|
|
|
df_iee_profiles.loc[:, df["cell_profile_ids"].sum()].sum( |
1544
|
|
|
axis=1 |
1545
|
|
|
) |
1546
|
|
|
* factor |
1547
|
|
|
/ 1e6 |
1548
|
|
|
) # from Wh to MWh |
1549
|
|
|
full_load = full_load.add(part_load) |
1550
|
|
|
elif not aggregate: |
1551
|
|
|
part_load = ( |
1552
|
|
|
df_iee_profiles.loc[:, df["cell_profile_ids"].sum()] |
1553
|
|
|
* factor |
1554
|
|
|
/ 1e6 |
1555
|
|
|
) # from Wh to MWh |
1556
|
|
|
full_load = pd.concat([full_load, part_load], axis=1).dropna( |
1557
|
|
|
axis=1 |
1558
|
|
|
) |
1559
|
|
|
else: |
1560
|
|
|
raise KeyError("Parameter 'aggregate' needs to be bool value!") |
1561
|
|
|
if peak_load_only: |
1562
|
|
|
full_load = full_load.max() |
1563
|
|
|
return full_load |
1564
|
|
|
|
1565
|
|
|
|
1566
|
|
|
def write_refinded_households_to_db(df_census_households_grid_refined): |
1567
|
|
|
# Write allocation table into database |
1568
|
|
|
EgonDestatisZensusHouseholdPerHaRefined.__table__.drop( |
1569
|
|
|
bind=engine, checkfirst=True |
1570
|
|
|
) |
1571
|
|
|
EgonDestatisZensusHouseholdPerHaRefined.__table__.create( |
1572
|
|
|
bind=engine, checkfirst=True |
1573
|
|
|
) |
1574
|
|
|
|
1575
|
|
|
with db.session_scope() as session: |
1576
|
|
|
session.bulk_insert_mappings( |
1577
|
|
|
EgonDestatisZensusHouseholdPerHaRefined, |
1578
|
|
|
df_census_households_grid_refined.to_dict(orient="records"), |
1579
|
|
|
) |
1580
|
|
|
|
1581
|
|
|
|
1582
|
|
|
def houseprofiles_in_census_cells(): |
1583
|
|
|
""" |
1584
|
|
|
Allocate household electricity demand profiles for each census cell. |
1585
|
|
|
|
1586
|
|
|
Creates table `emand.egon_household_electricity_profile_in_census_cell` that maps |
1587
|
|
|
household electricity demand profiles to census cells. Each row represents one cell |
1588
|
|
|
and contains a list of profile IDs. This table is fundamental |
1589
|
|
|
for creating subsequent data like demand profiles on MV grid level or for |
1590
|
|
|
determining the peak load at load area level. |
1591
|
|
|
|
1592
|
|
|
Use :func:`get_houseprofiles_in_census_cells` to retrieve the data from |
1593
|
|
|
the database as pandas. |
1594
|
|
|
|
1595
|
|
|
""" |
1596
|
|
|
|
1597
|
|
|
def gen_profile_names(n): |
1598
|
|
|
"""Join from Format (str),(int) to (str)a000(int)""" |
1599
|
|
|
a = f"{n[0]}a{int(n[1]):05d}" |
1600
|
|
|
return a |
1601
|
|
|
|
1602
|
|
|
# Init random generators using global seed |
1603
|
|
|
random.seed(RANDOM_SEED) |
1604
|
|
|
np.random.seed(RANDOM_SEED) |
1605
|
|
|
|
1606
|
|
|
# Read demand profiles from egon-data-bundle |
1607
|
|
|
df_iee_profiles = get_iee_hh_demand_profiles_raw() |
1608
|
|
|
|
1609
|
|
|
# Write raw profiles into db |
1610
|
|
|
write_hh_profiles_to_db(df_iee_profiles) |
1611
|
|
|
|
1612
|
|
|
# Process profiles for further use |
1613
|
|
|
df_iee_profiles = set_multiindex_to_profiles(df_iee_profiles) |
1614
|
|
|
|
1615
|
|
|
# Download zensus household NUTS-1 data with family type and age categories |
1616
|
|
|
df_census_households_nuts1_raw = get_census_households_nuts1_raw() |
1617
|
|
|
|
1618
|
|
|
# Reduce age intervals and remove kids |
1619
|
|
|
df_census_households_nuts1 = process_nuts1_census_data( |
1620
|
|
|
df_census_households_nuts1_raw |
1621
|
|
|
) |
1622
|
|
|
|
1623
|
|
|
# Regroup data to be compatible with categories from demand profile |
1624
|
|
|
# generator. |
1625
|
|
|
df_census_households_nuts1 = regroup_nuts1_census_data( |
1626
|
|
|
df_census_households_nuts1 |
1627
|
|
|
) |
1628
|
|
|
|
1629
|
|
|
# Convert data from people living in households to households |
1630
|
|
|
# Using a specified amount of inhabitants per household type |
1631
|
|
|
df_census_households_nuts1 = inhabitants_to_households( |
1632
|
|
|
df_census_households_nuts1 |
1633
|
|
|
) |
1634
|
|
|
|
1635
|
|
|
# Query census household grid data with family type |
1636
|
|
|
df_census_households_grid = get_census_households_grid() |
1637
|
|
|
|
1638
|
|
|
# fill cells with missing household distribution values but population |
1639
|
|
|
# by hh distribution value of random cell with same population value |
1640
|
|
|
df_census_households_grid = impute_missing_hh_in_populated_cells( |
1641
|
|
|
df_census_households_grid |
1642
|
|
|
) |
1643
|
|
|
|
1644
|
|
|
# Refine census household grid data with additional NUTS-1 level attributes |
1645
|
|
|
df_census_households_grid_refined = refine_census_data_at_cell_level( |
1646
|
|
|
df_census_households_grid, df_census_households_nuts1 |
1647
|
|
|
) |
1648
|
|
|
|
1649
|
|
|
write_refinded_households_to_db(df_census_households_grid_refined) |
1650
|
|
|
|
1651
|
|
|
# Allocate profile ids to each cell by census data |
1652
|
|
|
df_hh_profiles_in_census_cells = assign_hh_demand_profiles_to_cells( |
1653
|
|
|
df_census_households_grid_refined, df_iee_profiles |
1654
|
|
|
) |
1655
|
|
|
|
1656
|
|
|
# Annual household electricity demand on NUTS-3 level (demand regio) |
1657
|
|
|
df_demand_regio = db.select_dataframe( |
1658
|
|
|
sql=""" |
1659
|
|
|
SELECT year, nuts3, SUM (demand) as demand_mWha |
1660
|
|
|
FROM demand.egon_demandregio_hh as egon_d |
1661
|
|
|
GROUP BY nuts3, year |
1662
|
|
|
ORDER BY year""", |
1663
|
|
|
index_col=["year", "nuts3"], |
1664
|
|
|
) |
1665
|
|
|
|
1666
|
|
|
# Scale profiles to meet demand regio annual demand projections |
1667
|
|
|
df_hh_profiles_in_census_cells = adjust_to_demand_regio_nuts3_annual( |
1668
|
|
|
df_hh_profiles_in_census_cells, df_iee_profiles, df_demand_regio |
1669
|
|
|
) |
1670
|
|
|
|
1671
|
|
|
df_hh_profiles_in_census_cells = ( |
1672
|
|
|
df_hh_profiles_in_census_cells.reset_index(drop=False) |
1673
|
|
|
) |
1674
|
|
|
df_hh_profiles_in_census_cells["cell_id"] = df_hh_profiles_in_census_cells[ |
1675
|
|
|
"cell_id" |
1676
|
|
|
].astype(int) |
1677
|
|
|
|
1678
|
|
|
# Cast profile ids back to initial str format |
1679
|
|
|
df_hh_profiles_in_census_cells["cell_profile_ids"] = ( |
1680
|
|
|
df_hh_profiles_in_census_cells["cell_profile_ids"].apply( |
1681
|
|
|
lambda x: list(map(gen_profile_names, x)) |
1682
|
|
|
) |
1683
|
|
|
) |
1684
|
|
|
|
1685
|
|
|
# Write allocation table into database |
1686
|
|
|
HouseholdElectricityProfilesInCensusCells.__table__.drop( |
1687
|
|
|
bind=engine, checkfirst=True |
1688
|
|
|
) |
1689
|
|
|
HouseholdElectricityProfilesInCensusCells.__table__.create( |
1690
|
|
|
bind=engine, checkfirst=True |
1691
|
|
|
) |
1692
|
|
|
|
1693
|
|
|
with db.session_scope() as session: |
1694
|
|
|
session.bulk_insert_mappings( |
1695
|
|
|
HouseholdElectricityProfilesInCensusCells, |
1696
|
|
|
df_hh_profiles_in_census_cells.to_dict(orient="records"), |
1697
|
|
|
) |
1698
|
|
|
|
1699
|
|
|
|
1700
|
|
|
def get_houseprofiles_in_census_cells(): |
1701
|
|
|
""" |
1702
|
|
|
Retrieve household electricity demand profile mapping from database |
1703
|
|
|
|
1704
|
|
|
See Also |
1705
|
|
|
-------- |
1706
|
|
|
:func:`houseprofiles_in_census_cells` |
1707
|
|
|
|
1708
|
|
|
Returns |
1709
|
|
|
------- |
1710
|
|
|
pd.DataFrame |
1711
|
|
|
Mapping of household demand profiles to zensus cells |
1712
|
|
|
""" |
1713
|
|
|
with db.session_scope() as session: |
1714
|
|
|
q = session.query(HouseholdElectricityProfilesInCensusCells) |
1715
|
|
|
|
1716
|
|
|
census_profile_mapping = pd.read_sql( |
1717
|
|
|
q.statement, q.session.bind, index_col="cell_id" |
1718
|
|
|
) |
1719
|
|
|
|
1720
|
|
|
return census_profile_mapping |
1721
|
|
|
|
1722
|
|
|
|
1723
|
|
|
def get_cell_demand_metadata_from_db(attribute, list_of_identifiers): |
1724
|
|
|
""" |
1725
|
|
|
Retrieve selection of household electricity demand profile mapping |
1726
|
|
|
|
1727
|
|
|
Parameters |
1728
|
|
|
---------- |
1729
|
|
|
attribute: str |
1730
|
|
|
attribute to filter the table |
1731
|
|
|
|
1732
|
|
|
* nuts3 |
1733
|
|
|
* nuts1 |
1734
|
|
|
* cell_id |
1735
|
|
|
|
1736
|
|
|
list_of_identifiers: list of str/int |
1737
|
|
|
nuts3/nuts1 need to be str |
1738
|
|
|
cell_id need to be int |
1739
|
|
|
|
1740
|
|
|
See Also |
1741
|
|
|
-------- |
1742
|
|
|
:func:`houseprofiles_in_census_cells` |
1743
|
|
|
|
1744
|
|
|
Returns |
1745
|
|
|
------- |
1746
|
|
|
pd.DataFrame |
1747
|
|
|
Selection of mapping of household demand profiles to zensus cells |
1748
|
|
|
""" |
1749
|
|
|
attribute_options = ["nuts3", "nuts1", "cell_id"] |
1750
|
|
|
if attribute not in attribute_options: |
1751
|
|
|
raise ValueError(f"attribute has to be one of: {attribute_options}") |
1752
|
|
|
|
1753
|
|
|
if not isinstance(list_of_identifiers, list): |
1754
|
|
|
raise KeyError("'list_of_identifiers' is not a list!") |
1755
|
|
|
|
1756
|
|
|
# Query profile ids and scaling factors for specific attributes |
1757
|
|
|
with db.session_scope() as session: |
1758
|
|
|
if attribute == "nuts3": |
1759
|
|
|
cells_query = session.query( |
1760
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id, |
1761
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_profile_ids, |
1762
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3, |
1763
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1, |
1764
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2035, |
1765
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2050, |
1766
|
|
|
).filter( |
1767
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3.in_( |
1768
|
|
|
list_of_identifiers |
1769
|
|
|
) |
1770
|
|
|
) |
1771
|
|
|
elif attribute == "nuts1": |
1772
|
|
|
cells_query = session.query( |
1773
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id, |
1774
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_profile_ids, |
1775
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3, |
1776
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1, |
1777
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2035, |
1778
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2050, |
1779
|
|
|
).filter( |
1780
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1.in_( |
1781
|
|
|
list_of_identifiers |
1782
|
|
|
) |
1783
|
|
|
) |
1784
|
|
|
elif attribute == "cell_id": |
1785
|
|
|
cells_query = session.query( |
1786
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id, |
1787
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_profile_ids, |
1788
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3, |
1789
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1, |
1790
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2035, |
1791
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2050, |
1792
|
|
|
).filter( |
1793
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id.in_( |
1794
|
|
|
list_of_identifiers |
1795
|
|
|
) |
1796
|
|
|
) |
1797
|
|
|
|
1798
|
|
|
cell_demand_metadata = pd.read_sql( |
1799
|
|
|
cells_query.statement, cells_query.session.bind, index_col="cell_id" |
|
|
|
|
1800
|
|
|
) |
1801
|
|
|
return cell_demand_metadata |
1802
|
|
|
|
1803
|
|
|
|
1804
|
|
|
def get_hh_profiles_from_db(profile_ids): |
1805
|
|
|
""" |
1806
|
|
|
Retrieve selection of household electricity demand profiles |
1807
|
|
|
|
1808
|
|
|
Parameters |
1809
|
|
|
---------- |
1810
|
|
|
profile_ids: list of str (str, int) |
1811
|
|
|
(type)a00..(profile number) with number having exactly 4 digits |
1812
|
|
|
|
1813
|
|
|
|
1814
|
|
|
See Also |
1815
|
|
|
-------- |
1816
|
|
|
:func:`houseprofiles_in_census_cells` |
1817
|
|
|
|
1818
|
|
|
Returns |
1819
|
|
|
------- |
1820
|
|
|
pd.DataFrame |
1821
|
|
|
Selection of household demand profiles |
1822
|
|
|
""" |
1823
|
|
|
# Query load profiles |
1824
|
|
|
with db.session_scope() as session: |
1825
|
|
|
cells_query = session.query( |
1826
|
|
|
IeeHouseholdLoadProfiles.load_in_wh, IeeHouseholdLoadProfiles.type |
1827
|
|
|
).filter(IeeHouseholdLoadProfiles.type.in_(profile_ids)) |
1828
|
|
|
|
1829
|
|
|
df_profile_loads = pd.read_sql( |
1830
|
|
|
cells_query.statement, cells_query.session.bind, index_col="type" |
1831
|
|
|
) |
1832
|
|
|
|
1833
|
|
|
# convert array to Dataframe |
1834
|
|
|
df_profile_loads = pd.DataFrame.from_records( |
1835
|
|
|
df_profile_loads["load_in_wh"], index=df_profile_loads.index |
1836
|
|
|
).T |
1837
|
|
|
|
1838
|
|
|
return df_profile_loads |
1839
|
|
|
|
1840
|
|
|
|
1841
|
|
|
def get_demand_regio_hh_profiles_from_db(year): |
1842
|
|
|
""" |
1843
|
|
|
Retrieve demand regio household electricity demand profiles in nuts3 level |
1844
|
|
|
|
1845
|
|
|
Parameters |
1846
|
|
|
---------- |
1847
|
|
|
year: int |
1848
|
|
|
To which year belong the required demand profile |
1849
|
|
|
|
1850
|
|
|
Returns |
1851
|
|
|
------- |
1852
|
|
|
pd.DataFrame |
1853
|
|
|
Selection of household demand profiles |
1854
|
|
|
""" |
1855
|
|
|
|
1856
|
|
|
query = """Select * from demand.demandregio_household_load_profiles |
1857
|
|
|
Where year = year""" |
1858
|
|
|
|
1859
|
|
|
df_profile_loads = pd.read_sql(query, db.engine(), index_col="id") |
1860
|
|
|
|
1861
|
|
|
return df_profile_loads |
1862
|
|
|
|
1863
|
|
|
|
1864
|
|
|
def mv_grid_district_HH_electricity_load(scenario_name, scenario_year): |
1865
|
|
|
""" |
1866
|
|
|
Aggregated household demand time series at HV/MV substation level |
1867
|
|
|
|
1868
|
|
|
Calculate the aggregated demand time series based on the demand profiles |
1869
|
|
|
of each zensus cell inside each MV grid district. Profiles are read from |
1870
|
|
|
local hdf5-file or demand timeseries per nuts3 in db. |
1871
|
|
|
Creates table `demand.egon_etrago_electricity_households` with |
1872
|
|
|
Household electricity demand profiles aggregated at MV grid district level |
1873
|
|
|
in MWh. Primarily used to create the eTraGo data model. |
1874
|
|
|
|
1875
|
|
|
Parameters |
1876
|
|
|
---------- |
1877
|
|
|
scenario_name: str |
1878
|
|
|
Scenario name identifier, i.e. "eGon2035" |
1879
|
|
|
scenario_year: int |
1880
|
|
|
Scenario year according to `scenario_name` |
1881
|
|
|
|
1882
|
|
|
Returns |
1883
|
|
|
------- |
1884
|
|
|
pd.DataFrame |
1885
|
|
|
Multiindexed dataframe with `timestep` and `bus_id` as indexers. |
1886
|
|
|
Demand is given in kWh. |
1887
|
|
|
""" |
1888
|
|
|
|
1889
|
|
|
def tuple_format(x): |
1890
|
|
|
"""Convert Profile ids from string to tuple (type, id) |
1891
|
|
|
Convert from (str)a000(int) to (str), (int) |
1892
|
|
|
""" |
1893
|
|
|
return x[:2], int(x[3:]) |
1894
|
|
|
|
1895
|
|
|
with db.session_scope() as session: |
1896
|
|
|
cells_query = session.query( |
1897
|
|
|
HouseholdElectricityProfilesInCensusCells, |
1898
|
|
|
MapZensusGridDistricts.bus_id, |
1899
|
|
|
).join( |
1900
|
|
|
MapZensusGridDistricts, |
1901
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id |
1902
|
|
|
== MapZensusGridDistricts.zensus_population_id, |
1903
|
|
|
) |
1904
|
|
|
|
1905
|
|
|
cells = pd.read_sql( |
1906
|
|
|
cells_query.statement, cells_query.session.bind, index_col="cell_id" |
1907
|
|
|
) |
1908
|
|
|
|
1909
|
|
|
method = egon.data.config.settings()["egon-data"][ |
1910
|
|
|
"--household-electrical-demand-source" |
1911
|
|
|
] |
1912
|
|
|
|
1913
|
|
|
if method == "slp": |
1914
|
|
|
# Import demand regio timeseries demand per nuts3 area |
1915
|
|
|
dr_series = pd.read_sql_query( |
1916
|
|
|
""" |
1917
|
|
|
SELECT year, nuts3, load_in_mwh FROM demand.demandregio_household_load_profiles |
1918
|
|
|
""", |
1919
|
|
|
con=engine, |
1920
|
|
|
) |
1921
|
|
|
dr_series = dr_series[dr_series["year"] == scenario_year] |
1922
|
|
|
dr_series.drop(columns=["year"], inplace=True) |
1923
|
|
|
dr_series.set_index("nuts3", inplace=True) |
1924
|
|
|
dr_series = dr_series.squeeze() |
1925
|
|
|
|
1926
|
|
|
# Population data per cell_id is used to scale the demand per nuts3 |
1927
|
|
|
population = pd.read_sql_query( |
1928
|
|
|
""" |
1929
|
|
|
SELECT grid_id, population FROM society.destatis_zensus_population_per_ha |
1930
|
|
|
""", |
1931
|
|
|
con=engine, |
1932
|
|
|
) |
1933
|
|
|
population.set_index("grid_id", inplace=True) |
1934
|
|
|
population = population.squeeze() |
1935
|
|
|
population.loc[population == -1] = 0 |
1936
|
|
|
|
1937
|
|
|
cells["population"] = cells["grid_id"].map(population) |
1938
|
|
|
|
1939
|
|
|
factor_column = f"""factor_{scenario_year}""" |
1940
|
|
|
|
1941
|
|
|
mvgd_profiles = pd.DataFrame( |
1942
|
|
|
columns=["p_set", "q_set"], index=cells.bus_id.unique() |
1943
|
|
|
) |
1944
|
|
|
mvgd_profiles.index.name = "bus_id" |
1945
|
|
|
|
1946
|
|
|
for nuts3, df in cells.groupby("nuts3"): |
1947
|
|
|
cells.loc[df.index, factor_column] = ( |
1948
|
|
|
df["population"] / df["population"].sum() |
1949
|
|
|
) |
1950
|
|
|
|
1951
|
|
|
for bus, df_bus in cells.groupby("bus_id"): |
1952
|
|
|
load_nuts = [0] * 8760 |
1953
|
|
|
for nuts3, df_nuts in df_bus.groupby("nuts3"): |
1954
|
|
|
factor_nuts = df_nuts[factor_column].sum() |
1955
|
|
|
total_load = [x * factor_nuts for x in dr_series[nuts3]] |
1956
|
|
|
load_nuts = [sum(x) for x in zip(load_nuts, total_load)] |
1957
|
|
|
mvgd_profiles.at[bus, "p_set"] = load_nuts |
1958
|
|
|
|
1959
|
|
|
mvgd_profiles.reset_index(inplace=True) |
1960
|
|
|
|
1961
|
|
|
elif method == "bottom-up-profiles": |
1962
|
|
|
# convert profile ids to tuple (type, id) format |
1963
|
|
|
cells["cell_profile_ids"] = cells["cell_profile_ids"].apply( |
1964
|
|
|
lambda x: list(map(tuple_format, x)) |
1965
|
|
|
) |
1966
|
|
|
|
1967
|
|
|
# Read demand profiles from egon-data-bundle |
1968
|
|
|
df_iee_profiles = get_iee_hh_demand_profiles_raw() |
1969
|
|
|
|
1970
|
|
|
# Process profiles for further use |
1971
|
|
|
df_iee_profiles = set_multiindex_to_profiles(df_iee_profiles) |
1972
|
|
|
|
1973
|
|
|
# Create aggregated load profile for each MV grid district |
1974
|
|
|
mvgd_profiles_dict = {} |
1975
|
|
|
for grid_district, data in cells.groupby("bus_id"): |
1976
|
|
|
mvgd_profile = get_load_timeseries( |
1977
|
|
|
df_iee_profiles=df_iee_profiles, |
1978
|
|
|
df_hh_profiles_in_census_cells=data, |
1979
|
|
|
cell_ids=data.index, |
1980
|
|
|
year=scenario_year, |
1981
|
|
|
peak_load_only=False, |
1982
|
|
|
) |
1983
|
|
|
mvgd_profiles_dict[grid_district] = [ |
1984
|
|
|
mvgd_profile.round(3).to_list() |
1985
|
|
|
] |
1986
|
|
|
mvgd_profiles = pd.DataFrame.from_dict( |
1987
|
|
|
mvgd_profiles_dict, orient="index" |
1988
|
|
|
) |
1989
|
|
|
|
1990
|
|
|
# Reshape data: put MV grid ids in columns to a single index column |
1991
|
|
|
mvgd_profiles = mvgd_profiles.reset_index() |
1992
|
|
|
mvgd_profiles.columns = ["bus_id", "p_set"] |
1993
|
|
|
|
1994
|
|
|
# Add remaining columns |
1995
|
|
|
mvgd_profiles["scn_name"] = scenario_name |
1996
|
|
|
|
1997
|
|
|
# Insert data into respective database table |
1998
|
|
|
mvgd_profiles.to_sql( |
|
|
|
|
1999
|
|
|
name=EgonEtragoElectricityHouseholds.__table__.name, |
2000
|
|
|
schema=EgonEtragoElectricityHouseholds.__table__.schema, |
2001
|
|
|
con=engine, |
2002
|
|
|
if_exists="append", |
2003
|
|
|
method="multi", |
2004
|
|
|
chunksize=10000, |
2005
|
|
|
index=False, |
2006
|
|
|
) |
2007
|
|
|
|
2008
|
|
|
|
2009
|
|
|
def get_scaled_profiles_from_db( |
2010
|
|
|
attribute, list_of_identifiers, year, aggregate=True, peak_load_only=False |
2011
|
|
|
): |
2012
|
|
|
"""Retrieve selection of scaled household electricity demand profiles |
2013
|
|
|
|
2014
|
|
|
Parameters |
2015
|
|
|
---------- |
2016
|
|
|
attribute: str |
2017
|
|
|
attribute to filter the table |
2018
|
|
|
|
2019
|
|
|
* nuts3 |
2020
|
|
|
* nuts1 |
2021
|
|
|
* cell_id |
2022
|
|
|
|
2023
|
|
|
list_of_identifiers: list of str/int |
2024
|
|
|
nuts3/nuts1 need to be str |
2025
|
|
|
cell_id need to be int |
2026
|
|
|
|
2027
|
|
|
year: int |
2028
|
|
|
* 2035 |
2029
|
|
|
* 2050 |
2030
|
|
|
|
2031
|
|
|
aggregate: bool |
2032
|
|
|
If True, all profiles are summed. This uses a lot of RAM if a high |
2033
|
|
|
attribute level is chosen |
2034
|
|
|
|
2035
|
|
|
peak_load_only: bool |
2036
|
|
|
If True, only peak load value is returned |
2037
|
|
|
|
2038
|
|
|
Notes |
2039
|
|
|
----- |
2040
|
|
|
Aggregate == False option can use a lot of RAM if many profiles are selected |
2041
|
|
|
|
2042
|
|
|
|
2043
|
|
|
Returns |
2044
|
|
|
------- |
2045
|
|
|
pd.Series or float |
2046
|
|
|
Aggregated time series for given `cell_ids` or peak load of this time |
2047
|
|
|
series in MWh. |
2048
|
|
|
""" |
2049
|
|
|
cell_demand_metadata = get_cell_demand_metadata_from_db( |
2050
|
|
|
attribute=attribute, list_of_identifiers=list_of_identifiers |
2051
|
|
|
) |
2052
|
|
|
profile_ids = cell_demand_metadata.cell_profile_ids.sum() |
2053
|
|
|
|
2054
|
|
|
df_iee_profiles = get_hh_profiles_from_db(profile_ids) |
2055
|
|
|
|
2056
|
|
|
scaled_profiles = get_load_timeseries( |
2057
|
|
|
df_iee_profiles=df_iee_profiles, |
2058
|
|
|
df_hh_profiles_in_census_cells=cell_demand_metadata, |
2059
|
|
|
cell_ids=cell_demand_metadata.index.to_list(), |
2060
|
|
|
year=year, |
2061
|
|
|
aggregate=aggregate, |
2062
|
|
|
peak_load_only=peak_load_only, |
2063
|
|
|
) |
2064
|
|
|
return scaled_profiles |
2065
|
|
|
|