1
|
|
|
""" |
2
|
|
|
Household electricity demand time series for scenarios eGon2035 and eGon100RE at |
3
|
|
|
census cell level are set up. |
4
|
|
|
|
5
|
|
|
Electricity demand data for households in Germany in 1-hourly resolution for |
6
|
|
|
an entire year. Spatially, the data is resolved to 100 x 100 m cells and |
7
|
|
|
provides individual and distinct time series for each household in a cell. |
8
|
|
|
The cells are defined by the dataset Zensus 2011. |
9
|
|
|
|
10
|
|
|
""" |
11
|
|
|
from itertools import cycle, product |
12
|
|
|
from pathlib import Path |
13
|
|
|
import os |
14
|
|
|
import random |
15
|
|
|
|
16
|
|
|
from airflow.operators.python import PythonOperator |
17
|
|
|
from sqlalchemy import ARRAY, Column, Float, Integer, String |
18
|
|
|
from sqlalchemy.dialects.postgresql import CHAR, INTEGER, REAL |
19
|
|
|
from sqlalchemy.ext.declarative import declarative_base |
20
|
|
|
import numpy as np |
21
|
|
|
import pandas as pd |
22
|
|
|
|
23
|
|
|
from egon.data import db |
24
|
|
|
from egon.data.datasets import Dataset |
25
|
|
|
from egon.data.datasets.scenario_parameters import get_scenario_year |
26
|
|
|
from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts |
27
|
|
|
import egon.data.config |
28
|
|
|
|
29
|
|
|
Base = declarative_base() |
30
|
|
|
engine = db.engine() |
31
|
|
|
|
32
|
|
|
|
33
|
|
|
# Get random seed from config |
34
|
|
|
RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"] |
35
|
|
|
|
36
|
|
|
|
37
|
|
|
class IeeHouseholdLoadProfiles(Base): |
38
|
|
|
""" |
39
|
|
|
Class definition of table demand.iee_household_load_profiles. |
40
|
|
|
""" |
41
|
|
|
__tablename__ = "iee_household_load_profiles" |
42
|
|
|
__table_args__ = {"schema": "demand"} |
43
|
|
|
|
44
|
|
|
id = Column(INTEGER, primary_key=True) |
45
|
|
|
type = Column(CHAR(8), index=True) |
46
|
|
|
load_in_wh = Column(ARRAY(REAL)) |
47
|
|
|
|
48
|
|
|
|
49
|
|
|
class HouseholdElectricityProfilesInCensusCells(Base): |
50
|
|
|
""" |
51
|
|
|
Class definition of table demand.egon_household_electricity_profile_in_census_cell. |
52
|
|
|
|
53
|
|
|
Lists references and scaling parameters of time series data for each |
54
|
|
|
household in a cell by identifiers. This table is fundamental for creating |
55
|
|
|
subsequent data like demand profiles on MV grid level or for determining |
56
|
|
|
the peak load at load area level. |
57
|
|
|
|
58
|
|
|
""" |
59
|
|
|
__tablename__ = "egon_household_electricity_profile_in_census_cell" |
60
|
|
|
__table_args__ = {"schema": "demand"} |
61
|
|
|
|
62
|
|
|
cell_id = Column(Integer, primary_key=True) |
63
|
|
|
grid_id = Column(String) |
64
|
|
|
cell_profile_ids = Column(ARRAY(String, dimensions=1)) |
65
|
|
|
nuts3 = Column(String) |
66
|
|
|
nuts1 = Column(String) |
67
|
|
|
factor_2019 = Column(Float) |
68
|
|
|
factor_2023 = Column(Float) |
69
|
|
|
factor_2035 = Column(Float) |
70
|
|
|
factor_2050 = Column(Float) |
71
|
|
|
|
72
|
|
|
|
73
|
|
|
class EgonDestatisZensusHouseholdPerHaRefined(Base): |
74
|
|
|
""" |
75
|
|
|
Class definition of table society.egon_destatis_zensus_household_per_ha_refined. |
76
|
|
|
""" |
77
|
|
|
__tablename__ = "egon_destatis_zensus_household_per_ha_refined" |
78
|
|
|
__table_args__ = {"schema": "society"} |
79
|
|
|
|
80
|
|
|
id = Column(INTEGER, primary_key=True) |
81
|
|
|
cell_id = Column(Integer, index=True) |
82
|
|
|
grid_id = Column(String, index=True) |
83
|
|
|
nuts3 = Column(String) |
84
|
|
|
nuts1 = Column(String) |
85
|
|
|
characteristics_code = Column(Integer) |
86
|
|
|
hh_5types = Column(Integer) |
87
|
|
|
hh_type = Column(CHAR(2)) |
88
|
|
|
hh_10types = Column(Integer) |
89
|
|
|
|
90
|
|
|
|
91
|
|
|
class EgonEtragoElectricityHouseholds(Base): |
92
|
|
|
""" |
93
|
|
|
Class definition of table demand.egon_etrago_electricity_households. |
94
|
|
|
|
95
|
|
|
The table contains household electricity demand profiles aggregated at MV grid |
96
|
|
|
district level in MWh. |
97
|
|
|
""" |
98
|
|
|
__tablename__ = "egon_etrago_electricity_households" |
99
|
|
|
__table_args__ = {"schema": "demand"} |
100
|
|
|
|
101
|
|
|
bus_id = Column(Integer, primary_key=True) |
102
|
|
|
scn_name = Column(String, primary_key=True) |
103
|
|
|
p_set = Column(ARRAY(Float)) |
104
|
|
|
q_set = Column(ARRAY(Float)) |
105
|
|
|
|
106
|
|
|
|
107
|
|
|
class HouseholdDemands(Dataset): |
108
|
|
|
""" |
109
|
|
|
Household electricity demand time series for scenarios eGon2035 and eGon100RE at |
110
|
|
|
census cell level are set up. |
111
|
|
|
|
112
|
|
|
Electricity demand data for households in Germany in 1-hourly resolution for |
113
|
|
|
an entire year. Spatially, the data is resolved to 100 x 100 m cells and |
114
|
|
|
provides individual and distinct time series for each household in a cell. |
115
|
|
|
The cells are defined by the dataset Zensus 2011. |
116
|
|
|
|
117
|
|
|
*Dependencies* |
118
|
|
|
* :py:class:`DemandRegio <egon.data.datasets.demandregio.DemandRegio>` |
119
|
|
|
* :py:class:`Vg250 <egon.data.datasets.vg250.Vg250>` |
120
|
|
|
* :py:class:`OsmBuildingsStreets <egon.data.datasets.osm_buildings_streets.OsmBuildingsStreets>` |
121
|
|
|
* :py:func:`create_buildings_residential_zensus_mapping <egon.data.datasets.osm_buildings_streets.create_buildings_residential_zensus_mapping>` |
122
|
|
|
* :py:class:`ZensusMiscellaneous <egon.data.datasets.zensus.ZensusMiscellaneous>` |
123
|
|
|
* :py:class:`ZensusMvGridDistricts <egon.data.datasets.zensus_mv_grid_districts.ZensusMvGridDistricts>` |
124
|
|
|
* :py:class:`ZensusVg250 <egon.data.datasets.zensus_vg250.ZensusVg250>` |
125
|
|
|
|
126
|
|
|
*Resulting tables* |
127
|
|
|
* :py:class:`demand.iee_household_load_profiles |
128
|
|
|
<IeeHouseholdLoadProfiles>` is created and filled |
129
|
|
|
* :py:class:`demand.egon_household_electricity_profile_in_census_cell |
130
|
|
|
<HouseholdElectricityProfilesInCensusCells>` is created and filled |
131
|
|
|
* :py:class:`society.egon_destatis_zensus_household_per_ha_refined |
132
|
|
|
<EgonDestatisZensusHouseholdPerHaRefined>` is created and filled |
133
|
|
|
* :py:class:`demand.egon_etrago_electricity_households |
134
|
|
|
<EgonEtragoElectricityHouseholds>` is created and filled |
135
|
|
|
|
136
|
|
|
The following datasets are used for creating the data: |
137
|
|
|
|
138
|
|
|
* Electricity demand time series for household categories |
139
|
|
|
produced by demand profile generator (DPG) from Fraunhofer IEE |
140
|
|
|
(see :func:`get_iee_hh_demand_profiles_raw`) |
141
|
|
|
* Spatial information about people living in households by Zensus 2011 at |
142
|
|
|
federal state level |
143
|
|
|
|
144
|
|
|
* Type of household (family status) |
145
|
|
|
* Age |
146
|
|
|
* Number of people |
147
|
|
|
* Spatial information about number of households per ha, categorized by type |
148
|
|
|
of household (family status) with 5 categories (also from Zensus 2011) |
149
|
|
|
* Demand-Regio annual household demand at NUTS3 level |
150
|
|
|
|
151
|
|
|
*What is the goal?* |
152
|
|
|
|
153
|
|
|
To use the electricity demand time series from the `demand profile generator` |
154
|
|
|
to created spatially reference household demand time series for Germany at a |
155
|
|
|
resolution of 100 x 100 m cells. |
156
|
|
|
|
157
|
|
|
*What is the challenge?* |
158
|
|
|
|
159
|
|
|
The electricity demand time series produced by demand profile generator offer |
160
|
|
|
12 different household profile categories. |
161
|
|
|
To use most of them, the spatial information about the number of households |
162
|
|
|
per cell (5 categories) needs to be enriched by supplementary data to match |
163
|
|
|
the household demand profile categories specifications. Hence, 10 out of 12 |
164
|
|
|
different household profile categories can be distinguished by increasing |
165
|
|
|
the number of categories of cell-level household data. |
166
|
|
|
|
167
|
|
|
*How are these datasets combined?* |
168
|
|
|
|
169
|
|
|
* Spatial information about people living in households by zensus (2011) at |
170
|
|
|
federal state NUTS1 level `df_zensus` is aggregated to be compatible |
171
|
|
|
to IEE household profile specifications. |
172
|
|
|
|
173
|
|
|
* exclude kids and reduce to adults and seniors |
174
|
|
|
* group as defined in `HH_TYPES` |
175
|
|
|
* convert data from people living in households to number of households |
176
|
|
|
by `mapping_people_in_households` |
177
|
|
|
* calculate fraction of fine household types (10) within subgroup of rough |
178
|
|
|
household types (5) `df_dist_households` |
179
|
|
|
* Spatial information about number of households per ha |
180
|
|
|
`df_census_households_nuts3` is mapped to NUTS1 and NUTS3 level. |
181
|
|
|
Data is refined with household subgroups via |
182
|
|
|
`df_dist_households` to `df_census_households_grid_refined`. |
183
|
|
|
* Enriched 100 x 100 m household dataset is used to sample and aggregate |
184
|
|
|
household profiles. A table including individual profile id's for each cell |
185
|
|
|
and scaling factor to match Demand-Regio annual sum projections for 2035 |
186
|
|
|
and 2050 at NUTS3 level is created in the database as |
187
|
|
|
`demand.household_electricity_profiles_in_census_cells`. |
188
|
|
|
|
189
|
|
|
*What are central assumptions during the data processing?* |
190
|
|
|
|
191
|
|
|
* Mapping zensus data to IEE household categories is not trivial. In |
192
|
|
|
conversion from persons in household to number of |
193
|
|
|
households, number of inhabitants for multi-person households is estimated |
194
|
|
|
as weighted average in `OO_factor` |
195
|
|
|
* The distribution to refine household types at cell level are the same for |
196
|
|
|
each federal state |
197
|
|
|
* Refining of household types lead to float number of profiles drew at cell |
198
|
|
|
level and need to be rounded to nearest int by np.rint(). |
199
|
|
|
* 100 x 100 m cells are matched to NUTS via cells centroid location |
200
|
|
|
* Cells with households in unpopulated areas are removed |
201
|
|
|
|
202
|
|
|
*Drawbacks and limitations of the data* |
203
|
|
|
|
204
|
|
|
* The distribution to refine household types at cell level are the same for |
205
|
|
|
each federal state |
206
|
|
|
* Household profiles aggregated annual demand matches Demand Regio demand at |
207
|
|
|
NUTS-3 level, but it is not matching the demand regio time series profile |
208
|
|
|
* Due to secrecy, some census data are highly modified under certain attributes |
209
|
|
|
(quantity_q = 2). This cell data is not corrected, but excluded. |
210
|
|
|
* There is deviation in the Census data from table to table. The statistical |
211
|
|
|
methods are not stringent. Hence, there are cases in which data contradicts. |
212
|
|
|
* Census data with attribute 'HHTYP_FAM' is missing for some cells with small |
213
|
|
|
amount of households. This data is generated using the average share of |
214
|
|
|
household types for cells with similar household number. For some cells the |
215
|
|
|
summed amount of households per type deviates from the total number with |
216
|
|
|
attribute 'INSGESAMT'. As the profiles are scaled with demand-regio data at |
217
|
|
|
nuts3-level the impact at a higher aggregation level is negligible. |
218
|
|
|
For sake of simplicity, the data is not corrected. |
219
|
|
|
* There are cells without household data but a population. A randomly chosen |
220
|
|
|
household distribution is taken from a subgroup of cells with same population |
221
|
|
|
value and applied to all cells with missing household distribution and the |
222
|
|
|
specific population value. |
223
|
|
|
|
224
|
|
|
*Helper functions* |
225
|
|
|
|
226
|
|
|
* To access the DB, select specific profiles at various aggregation levels |
227
|
|
|
use :py:func:`get_hh_profiles_from_db` |
228
|
|
|
* To access the DB, select specific profiles at various aggregation levels |
229
|
|
|
and scale profiles use :py:func:`get_scaled_profiles_from_db` |
230
|
|
|
|
231
|
|
|
|
232
|
|
|
""" |
233
|
|
|
|
234
|
|
|
#: |
235
|
|
|
name: str = "Household Demands" |
236
|
|
|
#: |
237
|
|
|
version: str = "0.0.12" |
238
|
|
|
|
239
|
|
|
def __init__(self, dependencies): |
240
|
|
|
tasks = (create_table, houseprofiles_in_census_cells,) |
241
|
|
|
|
242
|
|
|
if ( |
243
|
|
|
"status2019" |
244
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
245
|
|
|
): |
246
|
|
|
mv_hh_electricity_load_2035 = PythonOperator( |
247
|
|
|
task_id="MV-hh-electricity-load-2019", |
248
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
249
|
|
|
op_args=["status2019", 2019], |
250
|
|
|
) |
251
|
|
|
|
252
|
|
|
tasks = tasks + (mv_hh_electricity_load_2035,) |
253
|
|
|
|
254
|
|
|
if ( |
255
|
|
|
"status2023" |
256
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
257
|
|
|
): |
258
|
|
|
mv_hh_electricity_load_2035 = PythonOperator( |
259
|
|
|
task_id="MV-hh-electricity-load-2023", |
260
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
261
|
|
|
op_args=["status2023", 2023], |
262
|
|
|
) |
263
|
|
|
|
264
|
|
|
tasks = tasks + (mv_hh_electricity_load_2035,) |
265
|
|
|
|
266
|
|
|
if ( |
267
|
|
|
"eGon2035" |
268
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
269
|
|
|
): |
270
|
|
|
mv_hh_electricity_load_2035 = PythonOperator( |
271
|
|
|
task_id="MV-hh-electricity-load-2035", |
272
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
273
|
|
|
op_args=["eGon2035", 2035], |
274
|
|
|
) |
275
|
|
|
|
276
|
|
|
tasks = tasks + (mv_hh_electricity_load_2035,) |
277
|
|
|
|
278
|
|
|
if ( |
279
|
|
|
"eGon100RE" |
280
|
|
|
in egon.data.config.settings()["egon-data"]["--scenarios"] |
281
|
|
|
): |
282
|
|
|
mv_hh_electricity_load_2050 = PythonOperator( |
283
|
|
|
task_id="MV-hh-electricity-load-2050", |
284
|
|
|
python_callable=mv_grid_district_HH_electricity_load, |
285
|
|
|
op_args=["eGon100RE", 2050], |
286
|
|
|
) |
287
|
|
|
|
288
|
|
|
tasks = tasks + (mv_hh_electricity_load_2050,) |
289
|
|
|
|
290
|
|
|
super().__init__( |
291
|
|
|
name=self.name, |
292
|
|
|
version=self.version, |
293
|
|
|
dependencies=dependencies, |
294
|
|
|
tasks=tasks, |
295
|
|
|
) |
296
|
|
|
|
297
|
|
|
def create_table(): |
298
|
|
|
EgonEtragoElectricityHouseholds.__table__.drop( |
299
|
|
|
bind=engine, checkfirst=True |
300
|
|
|
) |
301
|
|
|
EgonEtragoElectricityHouseholds.__table__.create( |
302
|
|
|
bind=engine, checkfirst=True |
303
|
|
|
) |
304
|
|
|
|
305
|
|
|
def clean(x): |
306
|
|
|
"""Clean zensus household data row-wise |
307
|
|
|
|
308
|
|
|
Clean dataset by |
309
|
|
|
|
310
|
|
|
* converting '.' and '-' to str(0) |
311
|
|
|
* removing brackets |
312
|
|
|
|
313
|
|
|
Table can be converted to int/floats afterwards |
314
|
|
|
|
315
|
|
|
Parameters |
316
|
|
|
---------- |
317
|
|
|
x: pd.Series |
318
|
|
|
It is meant to be used with :code:`df.applymap()` |
319
|
|
|
|
320
|
|
|
Returns |
321
|
|
|
------- |
322
|
|
|
pd.Series |
323
|
|
|
Re-formatted data row |
324
|
|
|
""" |
325
|
|
|
x = str(x).replace("-", str(0)) |
326
|
|
|
x = str(x).replace(".", str(0)) |
327
|
|
|
x = x.strip("()") |
328
|
|
|
return x |
329
|
|
|
|
330
|
|
|
|
331
|
|
|
def write_hh_profiles_to_db(hh_profiles): |
332
|
|
|
"""Write HH demand profiles of IEE into db. One row per profile type. |
333
|
|
|
The annual load profile timeseries is an array. |
334
|
|
|
|
335
|
|
|
schema: demand |
336
|
|
|
tablename: iee_household_load_profiles |
337
|
|
|
|
338
|
|
|
|
339
|
|
|
|
340
|
|
|
Parameters |
341
|
|
|
---------- |
342
|
|
|
hh_profiles: pd.DataFrame |
343
|
|
|
It is meant to be used with :code:`df.applymap()` |
344
|
|
|
|
345
|
|
|
Returns |
346
|
|
|
------- |
347
|
|
|
""" |
348
|
|
|
hh_profiles = hh_profiles.rename_axis("type", axis=1) |
349
|
|
|
hh_profiles = hh_profiles.rename_axis("timestep", axis=0) |
350
|
|
|
hh_profiles = hh_profiles.stack().rename("load_in_wh") |
351
|
|
|
hh_profiles = hh_profiles.to_frame().reset_index() |
352
|
|
|
hh_profiles = hh_profiles.groupby("type").load_in_wh.apply(tuple) |
353
|
|
|
hh_profiles = hh_profiles.reset_index() |
354
|
|
|
|
355
|
|
|
IeeHouseholdLoadProfiles.__table__.drop(bind=engine, checkfirst=True) |
356
|
|
|
IeeHouseholdLoadProfiles.__table__.create(bind=engine) |
357
|
|
|
|
358
|
|
|
hh_profiles.to_sql( |
359
|
|
|
name=IeeHouseholdLoadProfiles.__table__.name, |
360
|
|
|
schema=IeeHouseholdLoadProfiles.__table__.schema, |
361
|
|
|
con=engine, |
362
|
|
|
if_exists="append", |
363
|
|
|
method="multi", |
364
|
|
|
chunksize=100, |
365
|
|
|
index=False, |
366
|
|
|
dtype={ |
367
|
|
|
"load_in_wh": IeeHouseholdLoadProfiles.load_in_wh.type, |
368
|
|
|
"type": IeeHouseholdLoadProfiles.type.type, |
369
|
|
|
"id": IeeHouseholdLoadProfiles.id.type, |
370
|
|
|
}, |
371
|
|
|
) |
372
|
|
|
|
373
|
|
|
|
374
|
|
|
def get_iee_hh_demand_profiles_raw(): |
375
|
|
|
"""Gets and returns household electricity demand profiles from the |
376
|
|
|
egon-data-bundle. |
377
|
|
|
|
378
|
|
|
Household electricity demand profiles generated by Fraunhofer IEE. |
379
|
|
|
Methodology is described in |
380
|
|
|
`Erzeugung zeitlich hochaufgelöster Stromlastprofile für verschiedene Haushaltstypen |
381
|
|
|
<https://www.researchgate.net/publication/273775902_Erzeugung_zeitlich_hochaufgeloster_Stromlastprofile_fur_verschiedene_Haushaltstypen>`_. |
382
|
|
|
It is used and further described in the following theses by: |
383
|
|
|
|
384
|
|
|
* Jonas Haack: |
385
|
|
|
"Auswirkungen verschiedener Haushaltslastprofile auf PV-Batterie-Systeme" |
386
|
|
|
(confidential) |
387
|
|
|
* Simon Ruben Drauz |
388
|
|
|
"Synthesis of a heat and electrical load profile for single and |
389
|
|
|
multi-family houses used for subsequent performance tests of a |
390
|
|
|
multi-component energy system", |
391
|
|
|
http://dx.doi.org/10.13140/RG.2.2.13959.14248 |
392
|
|
|
|
393
|
|
|
Notes |
394
|
|
|
----- |
395
|
|
|
The household electricity demand profiles have been generated for 2016 |
396
|
|
|
which is a leap year (8784 hours) starting on a Friday. The weather year |
397
|
|
|
is 2011 and the heat timeseries 2011 are generated for 2011 too (cf. |
398
|
|
|
dataset :mod:`egon.data.datasets.heat_demand_timeseries.HTS`), having |
399
|
|
|
8760h and starting on a Saturday. To align the profiles, the first day of |
400
|
|
|
the IEE profiles are deleted, resulting in 8760h starting on Saturday. |
401
|
|
|
|
402
|
|
|
Returns |
403
|
|
|
------- |
404
|
|
|
pd.DataFrame |
405
|
|
|
Table with profiles in columns and time as index. A pd.MultiIndex is |
406
|
|
|
used to distinguish load profiles from different EUROSTAT household |
407
|
|
|
types. |
408
|
|
|
""" |
409
|
|
|
data_config = egon.data.config.datasets() |
410
|
|
|
pa_config = data_config["hh_demand_profiles"] |
411
|
|
|
|
412
|
|
|
def ve(s): |
413
|
|
|
raise (ValueError(s)) |
414
|
|
|
|
415
|
|
|
dataset = egon.data.config.settings()["egon-data"]["--dataset-boundary"] |
416
|
|
|
|
417
|
|
|
file_section = ( |
418
|
|
|
"path" |
419
|
|
|
if dataset == "Everything" |
420
|
|
|
else "path_testmode" |
421
|
|
|
if dataset == "Schleswig-Holstein" |
422
|
|
|
else ve(f"'{dataset}' is not a valid dataset boundary.") |
423
|
|
|
) |
424
|
|
|
|
425
|
|
|
file_path = pa_config["sources"]["household_electricity_demand_profiles"][ |
426
|
|
|
file_section |
427
|
|
|
] |
428
|
|
|
|
429
|
|
|
download_directory = os.path.join( |
430
|
|
|
"data_bundle_egon_data", "household_electricity_demand_profiles" |
431
|
|
|
) |
432
|
|
|
|
433
|
|
|
hh_profiles_file = ( |
434
|
|
|
Path(".") / Path(download_directory) / Path(file_path).name |
435
|
|
|
) |
436
|
|
|
|
437
|
|
|
df_hh_profiles = pd.read_hdf(hh_profiles_file) |
438
|
|
|
|
439
|
|
|
# aggregate profile types O2, O1 and O0 as there is no differentiation |
440
|
|
|
# possible at cell level see :func:`regroup_nuts1_census_data`. |
441
|
|
|
merge_profiles = [i for i in df_hh_profiles.columns if "O1" in i[:3]] |
442
|
|
|
merge_profiles += [i for i in df_hh_profiles.columns if "O2" in i[:3]] |
443
|
|
|
merge_profiles += [i for i in df_hh_profiles.columns if "O0" in i[:3]] |
444
|
|
|
mapping = {f"{old}": f"O0a{i:05d}" for i, old in enumerate(merge_profiles)} |
445
|
|
|
df_hh_profiles.rename(columns=mapping, inplace=True) |
446
|
|
|
|
447
|
|
|
return df_hh_profiles |
448
|
|
|
|
449
|
|
|
|
450
|
|
|
def set_multiindex_to_profiles(hh_profiles): |
451
|
|
|
"""The profile id is split into type and number and set as multiindex. |
452
|
|
|
|
453
|
|
|
Parameters |
454
|
|
|
---------- |
455
|
|
|
hh_profiles: pd.DataFrame |
456
|
|
|
Profiles |
457
|
|
|
Returns |
458
|
|
|
------- |
459
|
|
|
hh_profiles: pd.DataFrame |
460
|
|
|
Profiles with Multiindex |
461
|
|
|
""" |
462
|
|
|
|
463
|
|
|
# set multiindex to HH_types |
464
|
|
|
hh_profiles.columns = pd.MultiIndex.from_arrays( |
465
|
|
|
[hh_profiles.columns.str[:2], hh_profiles.columns.str[3:]] |
466
|
|
|
) |
467
|
|
|
|
468
|
|
|
# Cast profile ids into tuple of type and int |
469
|
|
|
hh_profiles.columns = pd.MultiIndex.from_tuples( |
470
|
|
|
[(a, int(b)) for a, b in hh_profiles.columns] |
471
|
|
|
) |
472
|
|
|
|
473
|
|
|
return hh_profiles |
474
|
|
|
|
475
|
|
|
|
476
|
|
|
def get_census_households_nuts1_raw(): |
477
|
|
|
"""Get zensus age x household type data from egon-data-bundle |
478
|
|
|
|
479
|
|
|
Dataset about household size with information about the categories: |
480
|
|
|
|
481
|
|
|
* family type |
482
|
|
|
* age class |
483
|
|
|
* household size |
484
|
|
|
|
485
|
|
|
for Germany in spatial resolution of federal states NUTS-1. |
486
|
|
|
|
487
|
|
|
Data manually selected and retrieved from: |
488
|
|
|
https://ergebnisse2011.zensus2022.de/datenbank/online |
489
|
|
|
For reproducing data selection, please do: |
490
|
|
|
|
491
|
|
|
* Search for: "1000A-3016" |
492
|
|
|
* or choose topic: "Bevölkerung kompakt" |
493
|
|
|
* Choose table code: "1000A-3016" with title "Personen: Alter |
494
|
|
|
(11 Altersklassen) - Größe des privaten Haushalts - Typ des privaten |
495
|
|
|
Haushalts (nach Familien/Lebensform)" |
496
|
|
|
* Change setting "GEOLK1" to "Bundesländer (16)" |
497
|
|
|
|
498
|
|
|
Data would be available in higher resolution |
499
|
|
|
("Landkreise und kreisfreie Städte (412)"), but only after registration. |
500
|
|
|
|
501
|
|
|
The downloaded file is called 'Zensus2011_Personen.csv'. |
502
|
|
|
|
503
|
|
|
|
504
|
|
|
Returns |
505
|
|
|
------- |
506
|
|
|
pd.DataFrame |
507
|
|
|
Pre-processed zensus household data |
508
|
|
|
""" |
509
|
|
|
data_config = egon.data.config.datasets() |
510
|
|
|
pa_config = data_config["hh_demand_profiles"] |
511
|
|
|
file_path = pa_config["sources"]["zensus_household_types"]["path"] |
512
|
|
|
|
513
|
|
|
download_directory = os.path.join( |
514
|
|
|
"data_bundle_egon_data", "zensus_households" |
515
|
|
|
) |
516
|
|
|
|
517
|
|
|
households_file = ( |
518
|
|
|
Path(".") / Path(download_directory) / Path(file_path).name |
519
|
|
|
) |
520
|
|
|
|
521
|
|
|
households_raw = pd.read_csv( |
522
|
|
|
households_file, |
523
|
|
|
sep=";", |
524
|
|
|
decimal=".", |
525
|
|
|
skiprows=5, |
526
|
|
|
skipfooter=7, |
527
|
|
|
index_col=[0, 1], |
528
|
|
|
header=[0, 1], |
529
|
|
|
encoding="latin1", |
530
|
|
|
engine="python", |
531
|
|
|
) |
532
|
|
|
|
533
|
|
|
return households_raw |
534
|
|
|
|
535
|
|
|
|
536
|
|
|
def create_missing_zensus_data( |
537
|
|
|
df_households_typ, df_missing_data, missing_cells |
538
|
|
|
): |
539
|
|
|
""" |
540
|
|
|
Generate missing data as average share of the household types for cell groups with |
541
|
|
|
the same amount of households. |
542
|
|
|
|
543
|
|
|
There is missing data for specific attributes in the zensus dataset because |
544
|
|
|
of secrecy reasons. Some cells with only small amount of households are |
545
|
|
|
missing with attribute HHTYP_FAM. However the total amount of households |
546
|
|
|
is known with attribute INSGESAMT. The missing data is generated as average |
547
|
|
|
share of the household types for cell groups with the same amount of |
548
|
|
|
households. |
549
|
|
|
|
550
|
|
|
Parameters |
551
|
|
|
---------- |
552
|
|
|
df_households_typ: pd.DataFrame |
553
|
|
|
Zensus households data |
554
|
|
|
df_missing_data: pd.DataFrame |
555
|
|
|
number of missing cells of group of amount of households |
556
|
|
|
missing_cells: dict |
557
|
|
|
dictionary with list of grids of the missing cells grouped by amount of |
558
|
|
|
households in cell |
559
|
|
|
|
560
|
|
|
Returns |
561
|
|
|
---------- |
562
|
|
|
df_average_split: pd.DataFrame |
563
|
|
|
generated dataset of missing cells |
564
|
|
|
|
565
|
|
|
""" |
566
|
|
|
# grid_ids of missing cells grouped by amount of households |
567
|
|
|
missing_grid_ids = { |
568
|
|
|
group: list(df.grid_id) |
569
|
|
|
for group, df in missing_cells.groupby("quantity") |
570
|
|
|
} |
571
|
|
|
|
572
|
|
|
# Grid ids for cells with low household numbers |
573
|
|
|
df_households_typ = df_households_typ.set_index("grid_id", drop=True) |
574
|
|
|
hh_in_cells = df_households_typ.groupby("grid_id")["quantity"].sum() |
575
|
|
|
hh_index = { |
576
|
|
|
i: hh_in_cells.loc[hh_in_cells == i].index |
577
|
|
|
for i in df_missing_data.households.values |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
df_average_split = pd.DataFrame() |
581
|
|
|
for hh_size, index in hh_index.items(): |
582
|
|
|
# average split of household types in cells with low household numbers |
583
|
|
|
split = ( |
584
|
|
|
df_households_typ.loc[index].groupby("characteristics_code").sum() |
585
|
|
|
/ df_households_typ.loc[index].quantity.sum() |
586
|
|
|
) |
587
|
|
|
split = split.quantity * hh_size |
588
|
|
|
|
589
|
|
|
# correct rounding |
590
|
|
|
difference = int(split.sum() - split.round().sum()) |
591
|
|
|
if difference > 0: |
592
|
|
|
# add to any row |
593
|
|
|
split = split.round() |
594
|
|
|
random_row = split.sample() |
595
|
|
|
split[random_row.index] = random_row + difference |
596
|
|
|
elif difference < 0: |
597
|
|
|
# subtract only from rows > 0 |
598
|
|
|
split = split.round() |
599
|
|
|
random_row = split[split > 0].sample() |
600
|
|
|
split[random_row.index] = random_row + difference |
601
|
|
|
else: |
602
|
|
|
split = split.round() |
603
|
|
|
|
604
|
|
|
# Dataframe with average split for each cell |
605
|
|
|
temp = pd.DataFrame( |
606
|
|
|
product(zip(split, range(1, 6)), missing_grid_ids[hh_size]), |
607
|
|
|
columns=["tuple", "grid_id"], |
608
|
|
|
) |
609
|
|
|
temp = pd.DataFrame(temp.tuple.tolist()).join(temp.grid_id) |
610
|
|
|
temp = temp.rename(columns={0: "hh_5types", 1: "characteristics_code"}) |
611
|
|
|
temp = temp.dropna() |
612
|
|
|
temp = temp[(temp["hh_5types"] != 0)] |
613
|
|
|
# append for each cell group of households |
614
|
|
|
df_average_split = pd.concat( |
615
|
|
|
[df_average_split, temp], ignore_index=True |
616
|
|
|
) |
617
|
|
|
df_average_split["hh_5types"] = df_average_split["hh_5types"].astype(int) |
618
|
|
|
|
619
|
|
|
return df_average_split |
620
|
|
|
|
621
|
|
|
|
622
|
|
|
def process_nuts1_census_data(df_census_households_raw): |
623
|
|
|
"""Make data compatible with household demand profile categories |
624
|
|
|
|
625
|
|
|
Removes and reorders categories which are not needed to fit data to |
626
|
|
|
household types of IEE electricity demand time series generated by |
627
|
|
|
demand-profile-generator (DPG). |
628
|
|
|
|
629
|
|
|
* Kids (<15) are excluded as they are also excluded in DPG origin dataset |
630
|
|
|
* Adults (15<65) |
631
|
|
|
* Seniors (<65) |
632
|
|
|
|
633
|
|
|
Parameters |
634
|
|
|
---------- |
635
|
|
|
df_census_households_raw: pd.DataFrame |
636
|
|
|
cleaned zensus household type x age category data |
637
|
|
|
|
638
|
|
|
Returns |
639
|
|
|
------- |
640
|
|
|
pd.DataFrame |
641
|
|
|
Aggregated zensus household data on NUTS-1 level |
642
|
|
|
""" |
643
|
|
|
|
644
|
|
|
# Clean data to int only |
645
|
|
|
df_census_households = df_census_households_raw.applymap(clean).applymap( |
646
|
|
|
int |
647
|
|
|
) |
648
|
|
|
|
649
|
|
|
# Group data to fit Load Profile Generator categories |
650
|
|
|
# define kids/adults/seniors |
651
|
|
|
kids = ["Unter 3", "3 - 5", "6 - 14"] # < 15 |
652
|
|
|
adults = [ |
653
|
|
|
"15 - 17", |
654
|
|
|
"18 - 24", |
655
|
|
|
"25 - 29", |
656
|
|
|
"30 - 39", |
657
|
|
|
"40 - 49", |
658
|
|
|
"50 - 64", |
659
|
|
|
] # 15 < x <65 |
660
|
|
|
seniors = ["65 - 74", "75 und älter"] # >65 |
661
|
|
|
|
662
|
|
|
# sum groups of kids, adults and seniors and concat |
663
|
|
|
df_kids = ( |
664
|
|
|
df_census_households.loc[:, (slice(None), kids)] |
665
|
|
|
.groupby(level=0, axis=1) |
666
|
|
|
.sum() |
667
|
|
|
) |
668
|
|
|
df_adults = ( |
669
|
|
|
df_census_households.loc[:, (slice(None), adults)] |
670
|
|
|
.groupby(level=0, axis=1) |
671
|
|
|
.sum() |
672
|
|
|
) |
673
|
|
|
df_seniors = ( |
674
|
|
|
df_census_households.loc[:, (slice(None), seniors)] |
675
|
|
|
.groupby(level=0, axis=1) |
676
|
|
|
.sum() |
677
|
|
|
) |
678
|
|
|
df_census_households = pd.concat( |
679
|
|
|
[df_kids, df_adults, df_seniors], |
680
|
|
|
axis=1, |
681
|
|
|
keys=["Kids", "Adults", "Seniors"], |
682
|
|
|
names=["age", "persons"], |
683
|
|
|
) |
684
|
|
|
|
685
|
|
|
# reduce column names to state only |
686
|
|
|
mapping_state = { |
687
|
|
|
i: i.split()[1] |
688
|
|
|
for i in df_census_households.index.get_level_values(level=0) |
689
|
|
|
} |
690
|
|
|
|
691
|
|
|
# rename index |
692
|
|
|
df_census_households = df_census_households.rename( |
693
|
|
|
index=mapping_state, level=0 |
694
|
|
|
) |
695
|
|
|
# rename axis |
696
|
|
|
df_census_households = df_census_households.rename_axis(["state", "type"]) |
697
|
|
|
# unstack |
698
|
|
|
df_census_households = df_census_households.unstack() |
699
|
|
|
# reorder levels |
700
|
|
|
df_census_households = df_census_households.reorder_levels( |
701
|
|
|
order=["type", "persons", "age"], axis=1 |
702
|
|
|
) |
703
|
|
|
|
704
|
|
|
return df_census_households |
705
|
|
|
|
706
|
|
|
|
707
|
|
|
def regroup_nuts1_census_data(df_census_households_nuts1): |
708
|
|
|
"""Regroup census data and map according to demand-profile types. |
709
|
|
|
|
710
|
|
|
For more information look at the respective publication: |
711
|
|
|
https://www.researchgate.net/publication/273775902_Erzeugung_zeitlich_hochaufgeloster_Stromlastprofile_fur_verschiedene_Haushaltstypen |
712
|
|
|
|
713
|
|
|
|
714
|
|
|
Parameters |
715
|
|
|
---------- |
716
|
|
|
df_census_households_nuts1: pd.DataFrame |
717
|
|
|
census household data on NUTS-1 level in absolute values |
718
|
|
|
|
719
|
|
|
Returns |
720
|
|
|
---------- |
721
|
|
|
df_dist_households: pd.DataFrame |
722
|
|
|
Distribution of households type |
723
|
|
|
""" |
724
|
|
|
|
725
|
|
|
# Mapping of census household family types to Eurostat household types |
726
|
|
|
# - Adults living in households type |
727
|
|
|
# - kids are not included even if mentioned in household type name |
728
|
|
|
# **! The Eurostat data only counts adults/seniors, excluding kids <15** |
729
|
|
|
# Eurostat household types are used for demand-profile-generator |
730
|
|
|
# @iee-fraunhofer |
731
|
|
|
hh_types_eurostat = { |
732
|
|
|
"SR": [ |
733
|
|
|
("Einpersonenhaushalte (Singlehaushalte)", "Insgesamt", "Seniors"), |
734
|
|
|
("Alleinerziehende Elternteile", "Insgesamt", "Seniors"), |
735
|
|
|
], |
736
|
|
|
# Single Seniors Single Parents Seniors |
737
|
|
|
"SO": [ |
738
|
|
|
("Einpersonenhaushalte (Singlehaushalte)", "Insgesamt", "Adults") |
739
|
|
|
], # Single Adults |
740
|
|
|
"SK": [("Alleinerziehende Elternteile", "Insgesamt", "Adults")], |
741
|
|
|
# Single Parents Adult |
742
|
|
|
"PR": [ |
743
|
|
|
("Paare ohne Kind(er)", "2 Personen", "Seniors"), |
744
|
|
|
( |
745
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
746
|
|
|
"2 Personen", |
747
|
|
|
"Seniors", |
748
|
|
|
), |
749
|
|
|
], |
750
|
|
|
# Couples without Kids Senior & same sex couples & shared flat seniors |
751
|
|
|
"PO": [ |
752
|
|
|
("Paare ohne Kind(er)", "2 Personen", "Adults"), |
753
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "2 Personen", "Adults"), |
754
|
|
|
], |
755
|
|
|
# Couples without Kids adults & same sex couples & shared flat adults |
756
|
|
|
"P1": [("Paare mit Kind(ern)", "3 Personen", "Adults")], |
757
|
|
|
"P2": [("Paare mit Kind(ern)", "4 Personen", "Adults")], |
758
|
|
|
"P3": [ |
759
|
|
|
("Paare mit Kind(ern)", "5 Personen", "Adults"), |
760
|
|
|
("Paare mit Kind(ern)", "6 und mehr Personen", "Adults"), |
761
|
|
|
], |
762
|
|
|
"OR": [ |
763
|
|
|
( |
764
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
765
|
|
|
"3 Personen", |
766
|
|
|
"Seniors", |
767
|
|
|
), |
768
|
|
|
( |
769
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
770
|
|
|
"4 Personen", |
771
|
|
|
"Seniors", |
772
|
|
|
), |
773
|
|
|
( |
774
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
775
|
|
|
"5 Personen", |
776
|
|
|
"Seniors", |
777
|
|
|
), |
778
|
|
|
( |
779
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
780
|
|
|
"6 und mehr Personen", |
781
|
|
|
"Seniors", |
782
|
|
|
), |
783
|
|
|
("Paare mit Kind(ern)", "3 Personen", "Seniors"), |
784
|
|
|
("Paare ohne Kind(er)", "3 Personen", "Seniors"), |
785
|
|
|
("Paare mit Kind(ern)", "4 Personen", "Seniors"), |
786
|
|
|
("Paare ohne Kind(er)", "4 Personen", "Seniors"), |
787
|
|
|
("Paare mit Kind(ern)", "5 Personen", "Seniors"), |
788
|
|
|
("Paare ohne Kind(er)", "5 Personen", "Seniors"), |
789
|
|
|
("Paare mit Kind(ern)", "6 und mehr Personen", "Seniors"), |
790
|
|
|
("Paare ohne Kind(er)", "6 und mehr Personen", "Seniors"), |
791
|
|
|
], |
792
|
|
|
# no info about share of kids |
793
|
|
|
# OO, O1, O2 have the same amount, as no information about the share of |
794
|
|
|
# kids within census data set. |
795
|
|
|
"OO": [ |
796
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "3 Personen", "Adults"), |
797
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "4 Personen", "Adults"), |
798
|
|
|
("Mehrpersonenhaushalte ohne Kernfamilie", "5 Personen", "Adults"), |
799
|
|
|
( |
800
|
|
|
"Mehrpersonenhaushalte ohne Kernfamilie", |
801
|
|
|
"6 und mehr Personen", |
802
|
|
|
"Adults", |
803
|
|
|
), |
804
|
|
|
("Paare ohne Kind(er)", "3 Personen", "Adults"), |
805
|
|
|
("Paare ohne Kind(er)", "4 Personen", "Adults"), |
806
|
|
|
("Paare ohne Kind(er)", "5 Personen", "Adults"), |
807
|
|
|
("Paare ohne Kind(er)", "6 und mehr Personen", "Adults"), |
808
|
|
|
], |
809
|
|
|
# no info about share of kids |
810
|
|
|
} |
811
|
|
|
|
812
|
|
|
# absolute values |
813
|
|
|
df_hh_distribution_abs = pd.DataFrame( |
814
|
|
|
( |
815
|
|
|
{ |
816
|
|
|
hhtype: df_census_households_nuts1.loc[countries, codes].sum() |
|
|
|
|
817
|
|
|
for hhtype, codes in hh_types_eurostat.items() |
818
|
|
|
} |
819
|
|
|
for countries in df_census_households_nuts1.index |
820
|
|
|
), |
821
|
|
|
index=df_census_households_nuts1.index, |
822
|
|
|
) |
823
|
|
|
# drop zero columns |
824
|
|
|
df_hh_distribution_abs = df_hh_distribution_abs.loc[ |
825
|
|
|
:, (df_hh_distribution_abs != 0).any(axis=0) |
826
|
|
|
].T |
827
|
|
|
|
828
|
|
|
return df_hh_distribution_abs |
829
|
|
|
|
830
|
|
|
|
831
|
|
|
def inhabitants_to_households(df_hh_people_distribution_abs): |
832
|
|
|
""" |
833
|
|
|
Convert number of inhabitant to number of household types |
834
|
|
|
|
835
|
|
|
Takes the distribution of peoples living in types of households to |
836
|
|
|
calculate a distribution of household types by using a people-in-household |
837
|
|
|
mapping. Results are not rounded to int as it will be used to calculate |
838
|
|
|
a relative distribution anyways. |
839
|
|
|
The data of category 'HHGROESS_KLASS' in census households |
840
|
|
|
at grid level is used to determine an average wherever the amount |
841
|
|
|
of people is not trivial (OR, OO). Kids are not counted. |
842
|
|
|
|
843
|
|
|
Parameters |
844
|
|
|
---------- |
845
|
|
|
df_hh_people_distribution_abs: pd.DataFrame |
846
|
|
|
Grouped census household data on NUTS-1 level in absolute values |
847
|
|
|
|
848
|
|
|
Returns |
849
|
|
|
---------- |
850
|
|
|
df_dist_households: pd.DataFrame |
851
|
|
|
Distribution of households type |
852
|
|
|
|
853
|
|
|
""" |
854
|
|
|
|
855
|
|
|
# Get household size for each census cell grouped by |
856
|
|
|
# As this is only used to estimate size of households for OR, OO |
857
|
|
|
# The hh types 1 P and 2 P households are dropped |
858
|
|
|
df_hh_size = db.select_dataframe( |
859
|
|
|
sql=""" |
860
|
|
|
SELECT characteristics_text, SUM(quantity) as summe |
861
|
|
|
FROM society.egon_destatis_zensus_household_per_ha as egon_d |
862
|
|
|
WHERE attribute = 'HHGROESS_KLASS' AND quantity_q < 2 |
863
|
|
|
GROUP BY characteristics_text """, |
864
|
|
|
index_col="characteristics_text", |
865
|
|
|
) |
866
|
|
|
df_hh_size = df_hh_size.drop(index=["1 Person", "2 Personen"]) |
867
|
|
|
|
868
|
|
|
# Define/ estimate number of persons (w/o kids) for each household category |
869
|
|
|
# For categories S* and P* it's clear; for multi-person households (OO,OR) |
870
|
|
|
# the number is estimated as average by taking remaining persons |
871
|
|
|
OO_factor = ( |
872
|
|
|
sum(df_hh_size["summe"] * [3, 4, 5, 6]) / df_hh_size["summe"].sum() |
873
|
|
|
) |
874
|
|
|
mapping_people_in_households = { |
875
|
|
|
"SR": 1, |
876
|
|
|
"SO": 1, |
877
|
|
|
"SK": 1, # kids are excluded |
878
|
|
|
"PR": 2, |
879
|
|
|
"PO": 2, |
880
|
|
|
"P1": 2, # kids are excluded |
881
|
|
|
"P2": 2, # "" |
882
|
|
|
"P3": 2, # "" |
883
|
|
|
"OR": OO_factor, |
884
|
|
|
"OO": OO_factor, |
885
|
|
|
} |
886
|
|
|
|
887
|
|
|
# compare categories and remove form mapping if to many |
888
|
|
|
diff = set(df_hh_people_distribution_abs.index) ^ set( |
889
|
|
|
mapping_people_in_households.keys() |
890
|
|
|
) |
891
|
|
|
|
892
|
|
|
if bool(diff): |
893
|
|
|
for key in diff: |
894
|
|
|
mapping_people_in_households = dict(mapping_people_in_households) |
895
|
|
|
del mapping_people_in_households[key] |
896
|
|
|
print(f"Removed {diff} from mapping!") |
897
|
|
|
|
898
|
|
|
# divide amount of people by people in household types |
899
|
|
|
df_dist_households = df_hh_people_distribution_abs.div( |
900
|
|
|
mapping_people_in_households, axis=0 |
901
|
|
|
) |
902
|
|
|
|
903
|
|
|
return df_dist_households |
904
|
|
|
|
905
|
|
|
|
906
|
|
|
def impute_missing_hh_in_populated_cells(df_census_households_grid): |
907
|
|
|
""" |
908
|
|
|
Fills in missing household data in populated cells based on a random selection from |
909
|
|
|
a subgroup of cells with the same population value. |
910
|
|
|
|
911
|
|
|
There are cells without household data but a population. A randomly |
912
|
|
|
chosen household distribution is taken from a subgroup of cells with same |
913
|
|
|
population value and applied to all cells with missing household |
914
|
|
|
distribution and the specific population value. In the case, in which there |
915
|
|
|
is no subgroup with household data of the respective population value, the |
916
|
|
|
fallback is the subgroup with the last last smaller population value. |
917
|
|
|
|
918
|
|
|
Parameters |
919
|
|
|
---------- |
920
|
|
|
df_census_households_grid: pd.DataFrame |
921
|
|
|
census household data at 100x100m grid level |
922
|
|
|
|
923
|
|
|
Returns |
924
|
|
|
------- |
925
|
|
|
pd.DataFrame |
926
|
|
|
substituted census household data at 100x100m grid level""" |
927
|
|
|
|
928
|
|
|
df_w_hh = df_census_households_grid.dropna().reset_index(drop=True) |
929
|
|
|
df_wo_hh = df_census_households_grid.loc[ |
930
|
|
|
df_census_households_grid.isna().any(axis=1) |
931
|
|
|
].reset_index(drop=True) |
932
|
|
|
|
933
|
|
|
# iterate over unique population values |
934
|
|
|
for population in df_wo_hh["population"].sort_values().unique(): |
935
|
|
|
|
936
|
|
|
# create fallback if no cell with specific population available |
937
|
|
|
if population in df_w_hh["population"].unique(): |
938
|
|
|
fallback_value = population |
939
|
|
|
population_value = population |
940
|
|
|
# use fallback of last possible household distribution |
941
|
|
|
else: |
942
|
|
|
population_value = fallback_value |
|
|
|
|
943
|
|
|
|
944
|
|
|
# get cells with specific population value from cells with |
945
|
|
|
# household distribution |
946
|
|
|
df_w_hh_population_i = df_w_hh.loc[ |
947
|
|
|
df_w_hh["population"] == population_value |
948
|
|
|
] |
949
|
|
|
# choose random cell within this group |
950
|
|
|
rnd_cell_id_population_i = np.random.choice( |
951
|
|
|
df_w_hh_population_i["cell_id"].unique() |
952
|
|
|
) |
953
|
|
|
# get household distribution of this cell |
954
|
|
|
df_rand_hh_distribution = df_w_hh_population_i.loc[ |
955
|
|
|
df_w_hh_population_i["cell_id"] == rnd_cell_id_population_i |
956
|
|
|
] |
957
|
|
|
# get cells with specific population value from cells without |
958
|
|
|
# household distribution |
959
|
|
|
df_wo_hh_population_i = df_wo_hh.loc[ |
960
|
|
|
df_wo_hh["population"] == population |
961
|
|
|
] |
962
|
|
|
|
963
|
|
|
# all cells will get the same random household distribution |
964
|
|
|
|
965
|
|
|
# prepare size of dataframe by number of household types |
966
|
|
|
df_repeated = pd.concat( |
967
|
|
|
[df_wo_hh_population_i] * df_rand_hh_distribution.shape[0], |
968
|
|
|
ignore_index=True, |
969
|
|
|
) |
970
|
|
|
df_repeated = df_repeated.sort_values("cell_id").reset_index(drop=True) |
971
|
|
|
|
972
|
|
|
# insert random household distribution |
973
|
|
|
columns = ["characteristics_code", "hh_5types"] |
974
|
|
|
df_repeated.loc[:, columns] = pd.concat( |
975
|
|
|
[df_rand_hh_distribution.loc[:, columns]] |
976
|
|
|
* df_wo_hh_population_i.shape[0] |
977
|
|
|
).values |
978
|
|
|
# append new cells |
979
|
|
|
df_w_hh = pd.concat([df_w_hh, df_repeated], ignore_index=True) |
980
|
|
|
|
981
|
|
|
return df_w_hh |
982
|
|
|
|
983
|
|
|
|
984
|
|
|
def get_census_households_grid(): |
985
|
|
|
""" |
986
|
|
|
Retrieves and adjusts census household data at 100x100m grid level, accounting for |
987
|
|
|
missing or divergent data. |
988
|
|
|
|
989
|
|
|
Query census household data at 100x100m grid level from database. As |
990
|
|
|
there is a divergence in the census household data depending which |
991
|
|
|
attribute is used. There also exist cells without household but with |
992
|
|
|
population data. The missing data in these cases are substituted. First |
993
|
|
|
census household data with attribute 'HHTYP_FAM' is missing for some |
994
|
|
|
cells with small amount of households. This data is generated using the |
995
|
|
|
average share of household types for cells with similar household number. |
996
|
|
|
For some cells the summed amount of households per type deviates from the |
997
|
|
|
total number with attribute 'INSGESAMT'. As the profiles are scaled with |
998
|
|
|
demand-regio data at nuts3-level the impact at a higher aggregation level |
999
|
|
|
is negligible. For sake of simplicity, the data is not corrected. |
1000
|
|
|
|
1001
|
|
|
Returns |
1002
|
|
|
------- |
1003
|
|
|
pd.DataFrame |
1004
|
|
|
census household data at 100x100m grid level""" |
1005
|
|
|
|
1006
|
|
|
# Retrieve information about households for each census cell |
1007
|
|
|
# Only use cell-data which quality (quantity_q<2) is acceptable |
1008
|
|
|
df_census_households_grid = db.select_dataframe( |
1009
|
|
|
sql=""" |
1010
|
|
|
SELECT grid_id, attribute, characteristics_code, |
1011
|
|
|
characteristics_text, quantity |
1012
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1013
|
|
|
WHERE attribute = 'HHTYP_FAM' AND quantity_q <2""" |
1014
|
|
|
) |
1015
|
|
|
df_census_households_grid = df_census_households_grid.drop( |
1016
|
|
|
columns=["attribute", "characteristics_text"] |
1017
|
|
|
) |
1018
|
|
|
|
1019
|
|
|
# Missing data is detected |
1020
|
|
|
df_missing_data = db.select_dataframe( |
1021
|
|
|
sql=""" |
1022
|
|
|
SELECT count(joined.quantity_gesamt) as amount, |
1023
|
|
|
joined.quantity_gesamt as households |
1024
|
|
|
FROM( |
1025
|
|
|
SELECT t2.grid_id, quantity_gesamt, quantity_sum_fam, |
1026
|
|
|
(quantity_gesamt-(case when quantity_sum_fam isnull |
1027
|
|
|
then 0 else quantity_sum_fam end)) |
1028
|
|
|
as insgesamt_minus_fam |
1029
|
|
|
FROM ( |
1030
|
|
|
SELECT grid_id, SUM(quantity) as quantity_sum_fam |
1031
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1032
|
|
|
WHERE attribute = 'HHTYP_FAM' |
1033
|
|
|
GROUP BY grid_id) as t1 |
1034
|
|
|
Full JOIN ( |
1035
|
|
|
SELECT grid_id, sum(quantity) as quantity_gesamt |
1036
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1037
|
|
|
WHERE attribute = 'INSGESAMT' |
1038
|
|
|
GROUP BY grid_id) as t2 ON t1.grid_id = t2.grid_id |
1039
|
|
|
) as joined |
1040
|
|
|
WHERE quantity_sum_fam isnull |
1041
|
|
|
Group by quantity_gesamt """ |
1042
|
|
|
) |
1043
|
|
|
missing_cells = db.select_dataframe( |
1044
|
|
|
sql=""" |
1045
|
|
|
SELECT t12.grid_id, t12.quantity |
1046
|
|
|
FROM ( |
1047
|
|
|
SELECT t2.grid_id, (case when quantity_sum_fam isnull |
1048
|
|
|
then quantity_gesamt end) as quantity |
1049
|
|
|
FROM ( |
1050
|
|
|
SELECT grid_id, SUM(quantity) as quantity_sum_fam |
1051
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1052
|
|
|
WHERE attribute = 'HHTYP_FAM' |
1053
|
|
|
GROUP BY grid_id) as t1 |
1054
|
|
|
Full JOIN ( |
1055
|
|
|
SELECT grid_id, sum(quantity) as quantity_gesamt |
1056
|
|
|
FROM society.egon_destatis_zensus_household_per_ha |
1057
|
|
|
WHERE attribute = 'INSGESAMT' |
1058
|
|
|
GROUP BY grid_id) as t2 ON t1.grid_id = t2.grid_id |
1059
|
|
|
) as t12 |
1060
|
|
|
WHERE quantity is not null""" |
1061
|
|
|
) |
1062
|
|
|
|
1063
|
|
|
# Missing cells are substituted by average share of cells with same amount |
1064
|
|
|
# of households. |
1065
|
|
|
df_average_split = create_missing_zensus_data( |
1066
|
|
|
df_census_households_grid, df_missing_data, missing_cells |
1067
|
|
|
) |
1068
|
|
|
|
1069
|
|
|
df_census_households_grid = df_census_households_grid.rename( |
1070
|
|
|
columns={"quantity": "hh_5types"} |
1071
|
|
|
) |
1072
|
|
|
|
1073
|
|
|
df_census_households_grid = pd.concat( |
1074
|
|
|
[df_census_households_grid, df_average_split], ignore_index=True |
1075
|
|
|
) |
1076
|
|
|
|
1077
|
|
|
# Census cells with nuts3 and nuts1 information |
1078
|
|
|
df_grid_id = db.select_dataframe( |
1079
|
|
|
sql=""" |
1080
|
|
|
SELECT pop.grid_id, pop.id as cell_id, pop.population, |
1081
|
|
|
vg250.vg250_nuts3 as nuts3, lan.nuts as nuts1, lan.gen |
1082
|
|
|
FROM |
1083
|
|
|
society.destatis_zensus_population_per_ha_inside_germany as pop |
1084
|
|
|
LEFT JOIN boundaries.egon_map_zensus_vg250 as vg250 |
1085
|
|
|
ON (pop.id=vg250.zensus_population_id) |
1086
|
|
|
LEFT JOIN boundaries.vg250_lan as lan |
1087
|
|
|
ON (LEFT(vg250.vg250_nuts3, 3) = lan.nuts) |
1088
|
|
|
WHERE lan.gf = 4 """ |
1089
|
|
|
) |
1090
|
|
|
df_grid_id = df_grid_id.drop_duplicates() |
1091
|
|
|
df_grid_id = df_grid_id.reset_index(drop=True) |
1092
|
|
|
|
1093
|
|
|
# Merge household type and size data with considered (populated) census |
1094
|
|
|
# cells how='right' is used as ids of unpopulated areas are removed |
1095
|
|
|
# by df_grid_id or ancestors. See here: |
1096
|
|
|
# https://github.com/openego/eGon-data/blob/add4944456f22b8873504c5f579b61dca286e357/src/egon/data/datasets/zensus_vg250.py#L269 |
1097
|
|
|
df_census_households_grid = pd.merge( |
1098
|
|
|
df_census_households_grid, |
1099
|
|
|
df_grid_id, |
1100
|
|
|
left_on="grid_id", |
1101
|
|
|
right_on="grid_id", |
1102
|
|
|
how="right", |
1103
|
|
|
) |
1104
|
|
|
df_census_households_grid = df_census_households_grid.sort_values( |
1105
|
|
|
["cell_id", "characteristics_code"] |
1106
|
|
|
) |
1107
|
|
|
|
1108
|
|
|
return df_census_households_grid |
1109
|
|
|
|
1110
|
|
|
|
1111
|
|
|
def proportionate_allocation( |
1112
|
|
|
df_group, dist_households_nuts1, hh_10types_cluster |
1113
|
|
|
): |
1114
|
|
|
"""Household distribution at nuts1 are applied at census cell within group |
1115
|
|
|
|
1116
|
|
|
To refine the hh_5types and keep the distribution at nuts1 level, |
1117
|
|
|
the household types are clustered and drawn with proportionate weighting. |
1118
|
|
|
The resulting pool is splitted into subgroups with sizes according to |
1119
|
|
|
the number of households of clusters in cells. |
1120
|
|
|
|
1121
|
|
|
Parameters |
1122
|
|
|
---------- |
1123
|
|
|
df_group: pd.DataFrame |
1124
|
|
|
Census household data at grid level for specific hh_5type cluster in |
1125
|
|
|
a federal state |
1126
|
|
|
dist_households_nuts1: pd.Series |
1127
|
|
|
Household distribution of of hh_10types in a federal state |
1128
|
|
|
hh_10types_cluster: list of str |
1129
|
|
|
Cluster of household types to be refined to |
1130
|
|
|
|
1131
|
|
|
Returns |
1132
|
|
|
------- |
1133
|
|
|
pd.DataFrame |
1134
|
|
|
Refined household data with hh_10types of cluster at nuts1 level |
1135
|
|
|
""" |
1136
|
|
|
|
1137
|
|
|
# get probability of households within hh_5types group |
1138
|
|
|
probability = dist_households_nuts1[hh_10types_cluster].values |
1139
|
|
|
# get total number of households within hh_5types group in federal state |
1140
|
|
|
size = df_group["hh_5types"].sum().astype(int) |
1141
|
|
|
|
1142
|
|
|
# random sample within hh_5types group with probability for whole federal |
1143
|
|
|
# state |
1144
|
|
|
choices = np.random.choice( |
1145
|
|
|
a=hh_10types_cluster, size=size, replace=True, p=probability |
1146
|
|
|
) |
1147
|
|
|
# get section sizes to split the sample pool from federal state to grid |
1148
|
|
|
# cells |
1149
|
|
|
split_sections = df_group["hh_5types"].cumsum().astype(int)[:-1] |
1150
|
|
|
# split into grid cell groups |
1151
|
|
|
samples = np.split(choices, split_sections) |
1152
|
|
|
# count number of hh_10types for each cell |
1153
|
|
|
sample_count = [np.unique(x, return_counts=True) for x in samples] |
1154
|
|
|
|
1155
|
|
|
df_distribution = pd.DataFrame( |
1156
|
|
|
sample_count, columns=["hh_type", "hh_10types"] |
1157
|
|
|
) |
1158
|
|
|
# add cell_ids |
1159
|
|
|
df_distribution["cell_id"] = df_group["cell_id"].unique() |
1160
|
|
|
|
1161
|
|
|
# unnest |
1162
|
|
|
df_distribution = ( |
1163
|
|
|
df_distribution.apply(pd.Series.explode) |
1164
|
|
|
.reset_index(drop=True) |
1165
|
|
|
.dropna() |
1166
|
|
|
) |
1167
|
|
|
|
1168
|
|
|
return df_distribution |
1169
|
|
|
|
1170
|
|
|
|
1171
|
|
|
def refine_census_data_at_cell_level( |
1172
|
|
|
df_census_households_grid, |
1173
|
|
|
df_census_households_nuts1, |
1174
|
|
|
): |
1175
|
|
|
""" |
1176
|
|
|
Processes and merges census data to specify household numbers and types per census |
1177
|
|
|
cell according to IEE profiles. |
1178
|
|
|
|
1179
|
|
|
The census data is processed to define the number and type of households |
1180
|
|
|
per zensus cell. Two subsets of the census data are merged to fit the |
1181
|
|
|
IEE profiles specifications. To do this, proportionate allocation is |
1182
|
|
|
applied at nuts1 level and within household type clusters. |
1183
|
|
|
|
1184
|
|
|
.. list-table:: Mapping table |
1185
|
|
|
:header-rows: 1 |
1186
|
|
|
|
1187
|
|
|
* - characteristics_code |
1188
|
|
|
- characteristics_text |
1189
|
|
|
- mapping |
1190
|
|
|
* - 1 |
1191
|
|
|
- Einpersonenhaushalte (Singlehaushalte) |
1192
|
|
|
- SR; SO |
1193
|
|
|
* - 2 |
1194
|
|
|
- Paare ohne Kind(er) |
1195
|
|
|
- PR; PO |
1196
|
|
|
* - 3 |
1197
|
|
|
- Paare mit Kind(ern) |
1198
|
|
|
- P1; P2; P3 |
1199
|
|
|
* - 4 |
1200
|
|
|
- Alleinerziehende Elternteile |
1201
|
|
|
- SK |
1202
|
|
|
* - 5 |
1203
|
|
|
- Mehrpersonenhaushalte ohne Kernfamilie |
1204
|
|
|
- OR; OO |
1205
|
|
|
|
1206
|
|
|
Parameters |
1207
|
|
|
---------- |
1208
|
|
|
df_census_households_grid: pd.DataFrame |
1209
|
|
|
Aggregated zensus household data on 100x100m grid level |
1210
|
|
|
df_census_households_nuts1: pd.DataFrame |
1211
|
|
|
Aggregated zensus household data on NUTS-1 level |
1212
|
|
|
|
1213
|
|
|
Returns |
1214
|
|
|
------- |
1215
|
|
|
pd.DataFrame |
1216
|
|
|
Number of hh types per census cell |
1217
|
|
|
""" |
1218
|
|
|
mapping_zensus_hh_subgroups = { |
1219
|
|
|
1: ["SR", "SO"], |
1220
|
|
|
2: ["PR", "PO"], |
1221
|
|
|
3: ["P1", "P2", "P3"], |
1222
|
|
|
4: ["SK"], |
1223
|
|
|
5: ["OR", "OO"], |
1224
|
|
|
} |
1225
|
|
|
|
1226
|
|
|
# Calculate fraction of fine household types within subgroup of |
1227
|
|
|
# rough household types |
1228
|
|
|
df_dist_households = df_census_households_nuts1.copy() |
1229
|
|
|
for value in mapping_zensus_hh_subgroups.values(): |
1230
|
|
|
df_dist_households.loc[value] = df_census_households_nuts1.loc[ |
1231
|
|
|
value |
1232
|
|
|
].div(df_census_households_nuts1.loc[value].sum()) |
1233
|
|
|
|
1234
|
|
|
# Refine from hh_5types to hh_10types |
1235
|
|
|
df_distribution_nuts0 = pd.DataFrame() |
1236
|
|
|
# Loop over federal states |
1237
|
|
|
for gen, df_nuts1 in df_census_households_grid.groupby("gen"): |
1238
|
|
|
# take subgroup distribution from federal state |
1239
|
|
|
dist_households_nuts1 = df_dist_households[gen] |
1240
|
|
|
|
1241
|
|
|
df_distribution_nuts1 = pd.DataFrame() |
1242
|
|
|
# loop over hh_5types as cluster |
1243
|
|
|
for ( |
1244
|
|
|
hh_5type_cluster, |
1245
|
|
|
hh_10types_cluster, |
1246
|
|
|
) in mapping_zensus_hh_subgroups.items(): |
1247
|
|
|
# get census household of hh_5type and federal state |
1248
|
|
|
df_group = df_nuts1.loc[ |
1249
|
|
|
df_nuts1["characteristics_code"] == hh_5type_cluster |
1250
|
|
|
] |
1251
|
|
|
|
1252
|
|
|
# apply proportionate allocation function within cluster |
1253
|
|
|
df_distribution_group = proportionate_allocation( |
1254
|
|
|
df_group, dist_households_nuts1, hh_10types_cluster |
1255
|
|
|
) |
1256
|
|
|
df_distribution_group["characteristics_code"] = hh_5type_cluster |
1257
|
|
|
df_distribution_nuts1 = pd.concat( |
1258
|
|
|
[df_distribution_nuts1, df_distribution_group], |
1259
|
|
|
ignore_index=True, |
1260
|
|
|
) |
1261
|
|
|
|
1262
|
|
|
df_distribution_nuts0 = pd.concat( |
1263
|
|
|
[df_distribution_nuts0, df_distribution_nuts1], ignore_index=True |
1264
|
|
|
) |
1265
|
|
|
|
1266
|
|
|
df_census_households_grid_refined = df_census_households_grid.merge( |
1267
|
|
|
df_distribution_nuts0, |
1268
|
|
|
how="inner", |
1269
|
|
|
left_on=["cell_id", "characteristics_code"], |
1270
|
|
|
right_on=["cell_id", "characteristics_code"], |
1271
|
|
|
) |
1272
|
|
|
|
1273
|
|
|
df_census_households_grid_refined[ |
1274
|
|
|
"characteristics_code" |
1275
|
|
|
] = df_census_households_grid_refined["characteristics_code"].astype(int) |
1276
|
|
|
df_census_households_grid_refined[ |
1277
|
|
|
"hh_5types" |
1278
|
|
|
] = df_census_households_grid_refined["hh_5types"].astype(int) |
1279
|
|
|
df_census_households_grid_refined[ |
1280
|
|
|
"hh_10types" |
1281
|
|
|
] = df_census_households_grid_refined["hh_10types"].astype(int) |
1282
|
|
|
|
1283
|
|
|
return df_census_households_grid_refined |
1284
|
|
|
|
1285
|
|
|
|
1286
|
|
|
def get_cell_demand_profile_ids(df_cell, pool_size): |
1287
|
|
|
""" |
1288
|
|
|
Generates tuple of hh_type and zensus cell ids |
1289
|
|
|
|
1290
|
|
|
Takes a random sample of profile ids for given cell: |
1291
|
|
|
* if pool size >= sample size: without replacement |
1292
|
|
|
* if pool size < sample size: with replacement |
1293
|
|
|
|
1294
|
|
|
|
1295
|
|
|
Parameters |
1296
|
|
|
---------- |
1297
|
|
|
df_cell: pd.DataFrame |
1298
|
|
|
Household type information for a single zensus cell |
1299
|
|
|
pool_size: int |
1300
|
|
|
Number of available profiles to select from |
1301
|
|
|
|
1302
|
|
|
Returns |
1303
|
|
|
------- |
1304
|
|
|
list of tuple |
1305
|
|
|
List of (`hh_type`, `cell_id`) |
1306
|
|
|
|
1307
|
|
|
""" |
1308
|
|
|
# maybe use instead |
1309
|
|
|
# np.random.default_rng().integers(low=0, high=pool_size[hh_type], size=sq) |
1310
|
|
|
# instead of random.sample use random.choices() if with replacement |
1311
|
|
|
# list of sample ids per hh_type in cell |
1312
|
|
|
cell_profile_ids = [ |
1313
|
|
|
(hh_type, random.sample(range(pool_size[hh_type]), k=sq)) |
1314
|
|
|
if pool_size[hh_type] >= sq |
1315
|
|
|
else (hh_type, random.choices(range(pool_size[hh_type]), k=sq)) |
1316
|
|
|
for hh_type, sq in zip( |
1317
|
|
|
df_cell["hh_type"], |
1318
|
|
|
df_cell["hh_10types"], |
1319
|
|
|
) |
1320
|
|
|
] |
1321
|
|
|
|
1322
|
|
|
# format to lists of tuples (hh_type, id) |
1323
|
|
|
cell_profile_ids = [ |
1324
|
|
|
list(zip(cycle([hh_type]), ids)) for hh_type, ids in cell_profile_ids |
1325
|
|
|
] |
1326
|
|
|
# reduce to list |
1327
|
|
|
cell_profile_ids = [a for b in cell_profile_ids for a in b] |
1328
|
|
|
|
1329
|
|
|
return cell_profile_ids |
1330
|
|
|
|
1331
|
|
|
|
1332
|
|
|
# can be parallelized with grouping df_zensus_cells by grid_id/nuts3/nuts1 |
1333
|
|
|
def assign_hh_demand_profiles_to_cells(df_zensus_cells, df_iee_profiles): |
1334
|
|
|
""" |
1335
|
|
|
Assign household demand profiles to each census cell. |
1336
|
|
|
|
1337
|
|
|
A table including the demand profile ids for each cell is created by using |
1338
|
|
|
:func:`get_cell_demand_profile_ids`. Household profiles are randomly |
1339
|
|
|
sampled for each cell. The profiles are not replaced to the pool within |
1340
|
|
|
a cell but after. |
1341
|
|
|
|
1342
|
|
|
Parameters |
1343
|
|
|
---------- |
1344
|
|
|
df_zensus_cells: pd.DataFrame |
1345
|
|
|
Household type parameters. Each row representing one household. Hence, |
1346
|
|
|
multiple rows per zensus cell. |
1347
|
|
|
df_iee_profiles: pd.DataFrame |
1348
|
|
|
Household load profile data |
1349
|
|
|
|
1350
|
|
|
* Index: Times steps as serial integers |
1351
|
|
|
* Columns: pd.MultiIndex with (`HH_TYPE`, `id`) |
1352
|
|
|
|
1353
|
|
|
Returns |
1354
|
|
|
------- |
1355
|
|
|
pd.DataFrame |
1356
|
|
|
Tabular data with one row represents one zensus cell. |
1357
|
|
|
The column `cell_profile_ids` contains |
1358
|
|
|
a list of tuples (see :func:`get_cell_demand_profile_ids`) providing a |
1359
|
|
|
reference to the actual load profiles that are associated with this |
1360
|
|
|
cell. |
1361
|
|
|
""" |
1362
|
|
|
|
1363
|
|
|
df_hh_profiles_in_census_cells = pd.DataFrame( |
1364
|
|
|
index=df_zensus_cells.grid_id.unique(), |
1365
|
|
|
columns=[ |
1366
|
|
|
"cell_profile_ids", |
1367
|
|
|
"cell_id", |
1368
|
|
|
"nuts3", |
1369
|
|
|
"nuts1", |
1370
|
|
|
"factor_2035", |
1371
|
|
|
"factor_2050", |
1372
|
|
|
], |
1373
|
|
|
) |
1374
|
|
|
|
1375
|
|
|
df_hh_profiles_in_census_cells = ( |
1376
|
|
|
df_hh_profiles_in_census_cells.rename_axis("grid_id") |
1377
|
|
|
) |
1378
|
|
|
|
1379
|
|
|
pool_size = df_iee_profiles.groupby(level=0, axis=1).size() |
1380
|
|
|
|
1381
|
|
|
# only use non zero entries |
1382
|
|
|
df_zensus_cells = df_zensus_cells.loc[df_zensus_cells["hh_10types"] != 0] |
1383
|
|
|
for grid_id, df_cell in df_zensus_cells.groupby(by="grid_id"): |
1384
|
|
|
# random sampling of household profiles for each cell |
1385
|
|
|
# with or without replacement (see :func:`get_cell_demand_profile_ids`) |
1386
|
|
|
# within cell but after number of households are rounded to the nearest |
1387
|
|
|
# integer if float this results in a small deviation for the course of |
1388
|
|
|
# the aggregated profiles. |
1389
|
|
|
cell_profile_ids = get_cell_demand_profile_ids(df_cell, pool_size) |
1390
|
|
|
|
1391
|
|
|
df_hh_profiles_in_census_cells.at[grid_id, "cell_id"] = df_cell.loc[ |
1392
|
|
|
:, "cell_id" |
1393
|
|
|
].unique()[0] |
1394
|
|
|
df_hh_profiles_in_census_cells.at[ |
1395
|
|
|
grid_id, "cell_profile_ids" |
1396
|
|
|
] = cell_profile_ids |
1397
|
|
|
df_hh_profiles_in_census_cells.at[grid_id, "nuts3"] = df_cell.loc[ |
1398
|
|
|
:, "nuts3" |
1399
|
|
|
].unique()[0] |
1400
|
|
|
df_hh_profiles_in_census_cells.at[grid_id, "nuts1"] = df_cell.loc[ |
1401
|
|
|
:, "nuts1" |
1402
|
|
|
].unique()[0] |
1403
|
|
|
|
1404
|
|
|
return df_hh_profiles_in_census_cells |
1405
|
|
|
|
1406
|
|
|
|
1407
|
|
|
# can be parallelized with grouping df_zensus_cells by grid_id/nuts3/nuts1 |
1408
|
|
|
def adjust_to_demand_regio_nuts3_annual( |
1409
|
|
|
df_hh_profiles_in_census_cells, df_iee_profiles, df_demand_regio |
1410
|
|
|
): |
1411
|
|
|
""" |
1412
|
|
|
Computes the profile scaling factor for alignment to demand regio data |
1413
|
|
|
|
1414
|
|
|
The scaling factor can be used to re-scale each load profile such that the |
1415
|
|
|
sum of all load profiles within one NUTS-3 area equals the annual demand |
1416
|
|
|
of demand regio data. |
1417
|
|
|
|
1418
|
|
|
Parameters |
1419
|
|
|
---------- |
1420
|
|
|
df_hh_profiles_in_census_cells: pd.DataFrame |
1421
|
|
|
Result of :func:`assign_hh_demand_profiles_to_cells`. |
1422
|
|
|
df_iee_profiles: pd.DataFrame |
1423
|
|
|
Household load profile data |
1424
|
|
|
|
1425
|
|
|
* Index: Times steps as serial integers |
1426
|
|
|
* Columns: pd.MultiIndex with (`HH_TYPE`, `id`) |
1427
|
|
|
|
1428
|
|
|
df_demand_regio: pd.DataFrame |
1429
|
|
|
Annual demand by demand regio for each NUTS-3 region and scenario year. |
1430
|
|
|
Index is pd.MultiIndex with :code:`tuple(scenario_year, nuts3_code)`. |
1431
|
|
|
|
1432
|
|
|
Returns |
1433
|
|
|
------- |
1434
|
|
|
pd.DataFrame |
1435
|
|
|
Returns the same data as :func:`assign_hh_demand_profiles_to_cells`, |
1436
|
|
|
but with filled columns `factor_2035` and `factor_2050`. |
1437
|
|
|
""" |
1438
|
|
|
for nuts3_id, df_nuts3 in df_hh_profiles_in_census_cells.groupby( |
1439
|
|
|
by="nuts3" |
1440
|
|
|
): |
1441
|
|
|
nuts3_cell_ids = df_nuts3.index |
1442
|
|
|
nuts3_profile_ids = df_nuts3.loc[:, "cell_profile_ids"].sum() |
1443
|
|
|
|
1444
|
|
|
# take all profiles of one nuts3, aggregate and sum |
1445
|
|
|
# profiles in Wh |
1446
|
|
|
nuts3_profiles_sum_annual = ( |
1447
|
|
|
df_iee_profiles.loc[:, nuts3_profile_ids].sum().sum() |
1448
|
|
|
) |
1449
|
|
|
|
1450
|
|
|
# Scaling Factor |
1451
|
|
|
# ############## |
1452
|
|
|
# demand regio in MWh |
1453
|
|
|
# profiles in Wh |
1454
|
|
|
|
1455
|
|
|
for scn in egon.data.config.settings()["egon-data"]["--scenarios"]: |
1456
|
|
|
year = get_scenario_year(scn) |
1457
|
|
|
df_hh_profiles_in_census_cells.loc[ |
1458
|
|
|
nuts3_cell_ids, f"factor_{year}" |
1459
|
|
|
] = ( |
1460
|
|
|
df_demand_regio.loc[(year, nuts3_id), "demand_mwha"] |
1461
|
|
|
* 1e3 |
1462
|
|
|
/ (nuts3_profiles_sum_annual / 1e3) |
1463
|
|
|
) |
1464
|
|
|
|
1465
|
|
|
return df_hh_profiles_in_census_cells |
1466
|
|
|
|
1467
|
|
|
|
1468
|
|
|
def get_load_timeseries( |
1469
|
|
|
df_iee_profiles, |
1470
|
|
|
df_hh_profiles_in_census_cells, |
1471
|
|
|
cell_ids, |
1472
|
|
|
year, |
1473
|
|
|
aggregate=True, |
1474
|
|
|
peak_load_only=False, |
1475
|
|
|
): |
1476
|
|
|
""" |
1477
|
|
|
Get peak load for one load area in MWh |
1478
|
|
|
|
1479
|
|
|
The peak load is calculated in aggregated manner for a group of zensus |
1480
|
|
|
cells that belong to one load area (defined by `cell_ids`). |
1481
|
|
|
|
1482
|
|
|
Parameters |
1483
|
|
|
---------- |
1484
|
|
|
df_iee_profiles: pd.DataFrame |
1485
|
|
|
Household load profile data in Wh |
1486
|
|
|
|
1487
|
|
|
* Index: Times steps as serial integers |
1488
|
|
|
* Columns: pd.MultiIndex with (`HH_TYPE`, `id`) |
1489
|
|
|
|
1490
|
|
|
Used to calculate the peak load from. |
1491
|
|
|
df_hh_profiles_in_census_cells: pd.DataFrame |
1492
|
|
|
Return value of :func:`adjust_to_demand_regio_nuts3_annual`. |
1493
|
|
|
cell_ids: list |
1494
|
|
|
Zensus cell ids that define one group of zensus cells that belong to |
1495
|
|
|
the same load area. |
1496
|
|
|
year: int |
1497
|
|
|
Scenario year. Is used to consider the scaling factor for aligning |
1498
|
|
|
annual demand to NUTS-3 data. |
1499
|
|
|
aggregate: bool |
1500
|
|
|
If true, all profiles are aggregated |
1501
|
|
|
peak_load_only: bool |
1502
|
|
|
If true, only the peak load value is returned (the type of the return |
1503
|
|
|
value is `float`). Defaults to False which returns the entire time |
1504
|
|
|
series as pd.Series. |
1505
|
|
|
|
1506
|
|
|
Returns |
1507
|
|
|
------- |
1508
|
|
|
pd.Series or float |
1509
|
|
|
Aggregated time series for given `cell_ids` or peak load of this time |
1510
|
|
|
series in MWh. |
1511
|
|
|
""" |
1512
|
|
|
timesteps = len(df_iee_profiles) |
1513
|
|
|
if aggregate: |
1514
|
|
|
full_load = pd.Series( |
1515
|
|
|
data=np.zeros(timesteps), dtype=np.float64, index=range(timesteps) |
1516
|
|
|
) |
1517
|
|
|
else: |
1518
|
|
|
full_load = pd.DataFrame(index=range(timesteps)) |
1519
|
|
|
load_area_meta = df_hh_profiles_in_census_cells.loc[ |
1520
|
|
|
cell_ids, ["cell_profile_ids", "nuts3", f"factor_{year}"] |
1521
|
|
|
] |
1522
|
|
|
# loop over nuts3 (part_load) and sum (full_load) as the scaling factor |
1523
|
|
|
# applies at nuts3 level |
1524
|
|
|
for (nuts3, factor), df in load_area_meta.groupby( |
1525
|
|
|
by=["nuts3", f"factor_{year}"] |
1526
|
|
|
): |
1527
|
|
|
if aggregate: |
1528
|
|
|
part_load = ( |
1529
|
|
|
df_iee_profiles.loc[:, df["cell_profile_ids"].sum()].sum( |
1530
|
|
|
axis=1 |
1531
|
|
|
) |
1532
|
|
|
* factor |
1533
|
|
|
/ 1e6 |
1534
|
|
|
) # from Wh to MWh |
1535
|
|
|
full_load = full_load.add(part_load) |
1536
|
|
|
elif not aggregate: |
1537
|
|
|
part_load = ( |
1538
|
|
|
df_iee_profiles.loc[:, df["cell_profile_ids"].sum()] |
1539
|
|
|
* factor |
1540
|
|
|
/ 1e6 |
1541
|
|
|
) # from Wh to MWh |
1542
|
|
|
full_load = pd.concat([full_load, part_load], axis=1).dropna( |
1543
|
|
|
axis=1 |
1544
|
|
|
) |
1545
|
|
|
else: |
1546
|
|
|
raise KeyError("Parameter 'aggregate' needs to be bool value!") |
1547
|
|
|
if peak_load_only: |
1548
|
|
|
full_load = full_load.max() |
1549
|
|
|
return full_load |
1550
|
|
|
|
1551
|
|
|
|
1552
|
|
|
def write_refinded_households_to_db(df_census_households_grid_refined): |
1553
|
|
|
# Write allocation table into database |
1554
|
|
|
EgonDestatisZensusHouseholdPerHaRefined.__table__.drop( |
1555
|
|
|
bind=engine, checkfirst=True |
1556
|
|
|
) |
1557
|
|
|
EgonDestatisZensusHouseholdPerHaRefined.__table__.create( |
1558
|
|
|
bind=engine, checkfirst=True |
1559
|
|
|
) |
1560
|
|
|
|
1561
|
|
|
with db.session_scope() as session: |
1562
|
|
|
session.bulk_insert_mappings( |
1563
|
|
|
EgonDestatisZensusHouseholdPerHaRefined, |
1564
|
|
|
df_census_households_grid_refined.to_dict(orient="records"), |
1565
|
|
|
) |
1566
|
|
|
|
1567
|
|
|
|
1568
|
|
|
def houseprofiles_in_census_cells(): |
1569
|
|
|
""" |
1570
|
|
|
Allocate household electricity demand profiles for each census cell. |
1571
|
|
|
|
1572
|
|
|
Creates table `emand.egon_household_electricity_profile_in_census_cell` that maps |
1573
|
|
|
household electricity demand profiles to census cells. Each row represents one cell |
1574
|
|
|
and contains a list of profile IDs. This table is fundamental |
1575
|
|
|
for creating subsequent data like demand profiles on MV grid level or for |
1576
|
|
|
determining the peak load at load area level. |
1577
|
|
|
|
1578
|
|
|
Use :func:`get_houseprofiles_in_census_cells` to retrieve the data from |
1579
|
|
|
the database as pandas. |
1580
|
|
|
|
1581
|
|
|
""" |
1582
|
|
|
|
1583
|
|
|
def gen_profile_names(n): |
1584
|
|
|
"""Join from Format (str),(int) to (str)a000(int)""" |
1585
|
|
|
a = f"{n[0]}a{int(n[1]):05d}" |
1586
|
|
|
return a |
1587
|
|
|
|
1588
|
|
|
# Init random generators using global seed |
1589
|
|
|
random.seed(RANDOM_SEED) |
1590
|
|
|
np.random.seed(RANDOM_SEED) |
1591
|
|
|
|
1592
|
|
|
# Read demand profiles from egon-data-bundle |
1593
|
|
|
df_iee_profiles = get_iee_hh_demand_profiles_raw() |
1594
|
|
|
|
1595
|
|
|
# Write raw profiles into db |
1596
|
|
|
write_hh_profiles_to_db(df_iee_profiles) |
1597
|
|
|
|
1598
|
|
|
# Process profiles for further use |
1599
|
|
|
df_iee_profiles = set_multiindex_to_profiles(df_iee_profiles) |
1600
|
|
|
|
1601
|
|
|
# Download zensus household NUTS-1 data with family type and age categories |
1602
|
|
|
df_census_households_nuts1_raw = get_census_households_nuts1_raw() |
1603
|
|
|
|
1604
|
|
|
# Reduce age intervals and remove kids |
1605
|
|
|
df_census_households_nuts1 = process_nuts1_census_data( |
1606
|
|
|
df_census_households_nuts1_raw |
1607
|
|
|
) |
1608
|
|
|
|
1609
|
|
|
# Regroup data to be compatible with categories from demand profile |
1610
|
|
|
# generator. |
1611
|
|
|
df_census_households_nuts1 = regroup_nuts1_census_data( |
1612
|
|
|
df_census_households_nuts1 |
1613
|
|
|
) |
1614
|
|
|
|
1615
|
|
|
# Convert data from people living in households to households |
1616
|
|
|
# Using a specified amount of inhabitants per household type |
1617
|
|
|
df_census_households_nuts1 = inhabitants_to_households( |
1618
|
|
|
df_census_households_nuts1 |
1619
|
|
|
) |
1620
|
|
|
|
1621
|
|
|
# Query census household grid data with family type |
1622
|
|
|
df_census_households_grid = get_census_households_grid() |
1623
|
|
|
|
1624
|
|
|
# fill cells with missing household distribution values but population |
1625
|
|
|
# by hh distribution value of random cell with same population value |
1626
|
|
|
df_census_households_grid = impute_missing_hh_in_populated_cells( |
1627
|
|
|
df_census_households_grid |
1628
|
|
|
) |
1629
|
|
|
|
1630
|
|
|
# Refine census household grid data with additional NUTS-1 level attributes |
1631
|
|
|
df_census_households_grid_refined = refine_census_data_at_cell_level( |
1632
|
|
|
df_census_households_grid, df_census_households_nuts1 |
1633
|
|
|
) |
1634
|
|
|
|
1635
|
|
|
write_refinded_households_to_db(df_census_households_grid_refined) |
1636
|
|
|
|
1637
|
|
|
# Allocate profile ids to each cell by census data |
1638
|
|
|
df_hh_profiles_in_census_cells = assign_hh_demand_profiles_to_cells( |
1639
|
|
|
df_census_households_grid_refined, df_iee_profiles |
1640
|
|
|
) |
1641
|
|
|
|
1642
|
|
|
# Annual household electricity demand on NUTS-3 level (demand regio) |
1643
|
|
|
df_demand_regio = db.select_dataframe( |
1644
|
|
|
sql=""" |
1645
|
|
|
SELECT year, nuts3, SUM (demand) as demand_mWha |
1646
|
|
|
FROM demand.egon_demandregio_hh as egon_d |
1647
|
|
|
GROUP BY nuts3, year |
1648
|
|
|
ORDER BY year""", |
1649
|
|
|
index_col=["year", "nuts3"], |
1650
|
|
|
) |
1651
|
|
|
|
1652
|
|
|
# Scale profiles to meet demand regio annual demand projections |
1653
|
|
|
df_hh_profiles_in_census_cells = adjust_to_demand_regio_nuts3_annual( |
1654
|
|
|
df_hh_profiles_in_census_cells, df_iee_profiles, df_demand_regio |
1655
|
|
|
) |
1656
|
|
|
|
1657
|
|
|
df_hh_profiles_in_census_cells = ( |
1658
|
|
|
df_hh_profiles_in_census_cells.reset_index(drop=False) |
1659
|
|
|
) |
1660
|
|
|
df_hh_profiles_in_census_cells["cell_id"] = df_hh_profiles_in_census_cells[ |
1661
|
|
|
"cell_id" |
1662
|
|
|
].astype(int) |
1663
|
|
|
|
1664
|
|
|
# Cast profile ids back to initial str format |
1665
|
|
|
df_hh_profiles_in_census_cells[ |
1666
|
|
|
"cell_profile_ids" |
1667
|
|
|
] = df_hh_profiles_in_census_cells["cell_profile_ids"].apply( |
1668
|
|
|
lambda x: list(map(gen_profile_names, x)) |
1669
|
|
|
) |
1670
|
|
|
|
1671
|
|
|
# Write allocation table into database |
1672
|
|
|
HouseholdElectricityProfilesInCensusCells.__table__.drop( |
1673
|
|
|
bind=engine, checkfirst=True |
1674
|
|
|
) |
1675
|
|
|
HouseholdElectricityProfilesInCensusCells.__table__.create( |
1676
|
|
|
bind=engine, checkfirst=True |
1677
|
|
|
) |
1678
|
|
|
|
1679
|
|
|
with db.session_scope() as session: |
1680
|
|
|
session.bulk_insert_mappings( |
1681
|
|
|
HouseholdElectricityProfilesInCensusCells, |
1682
|
|
|
df_hh_profiles_in_census_cells.to_dict(orient="records"), |
1683
|
|
|
) |
1684
|
|
|
|
1685
|
|
|
|
1686
|
|
|
def get_houseprofiles_in_census_cells(): |
1687
|
|
|
""" |
1688
|
|
|
Retrieve household electricity demand profile mapping from database |
1689
|
|
|
|
1690
|
|
|
See Also |
1691
|
|
|
-------- |
1692
|
|
|
:func:`houseprofiles_in_census_cells` |
1693
|
|
|
|
1694
|
|
|
Returns |
1695
|
|
|
------- |
1696
|
|
|
pd.DataFrame |
1697
|
|
|
Mapping of household demand profiles to zensus cells |
1698
|
|
|
""" |
1699
|
|
|
with db.session_scope() as session: |
1700
|
|
|
q = session.query(HouseholdElectricityProfilesInCensusCells) |
1701
|
|
|
|
1702
|
|
|
census_profile_mapping = pd.read_sql( |
1703
|
|
|
q.statement, q.session.bind, index_col="cell_id" |
1704
|
|
|
) |
1705
|
|
|
|
1706
|
|
|
return census_profile_mapping |
1707
|
|
|
|
1708
|
|
|
|
1709
|
|
|
def get_cell_demand_metadata_from_db(attribute, list_of_identifiers): |
1710
|
|
|
""" |
1711
|
|
|
Retrieve selection of household electricity demand profile mapping |
1712
|
|
|
|
1713
|
|
|
Parameters |
1714
|
|
|
---------- |
1715
|
|
|
attribute: str |
1716
|
|
|
attribute to filter the table |
1717
|
|
|
|
1718
|
|
|
* nuts3 |
1719
|
|
|
* nuts1 |
1720
|
|
|
* cell_id |
1721
|
|
|
|
1722
|
|
|
list_of_identifiers: list of str/int |
1723
|
|
|
nuts3/nuts1 need to be str |
1724
|
|
|
cell_id need to be int |
1725
|
|
|
|
1726
|
|
|
See Also |
1727
|
|
|
-------- |
1728
|
|
|
:func:`houseprofiles_in_census_cells` |
1729
|
|
|
|
1730
|
|
|
Returns |
1731
|
|
|
------- |
1732
|
|
|
pd.DataFrame |
1733
|
|
|
Selection of mapping of household demand profiles to zensus cells |
1734
|
|
|
""" |
1735
|
|
|
attribute_options = ["nuts3", "nuts1", "cell_id"] |
1736
|
|
|
if attribute not in attribute_options: |
1737
|
|
|
raise ValueError(f"attribute has to be one of: {attribute_options}") |
1738
|
|
|
|
1739
|
|
|
if not isinstance(list_of_identifiers, list): |
1740
|
|
|
raise KeyError("'list_of_identifiers' is not a list!") |
1741
|
|
|
|
1742
|
|
|
# Query profile ids and scaling factors for specific attributes |
1743
|
|
|
with db.session_scope() as session: |
1744
|
|
|
if attribute == "nuts3": |
1745
|
|
|
cells_query = session.query( |
1746
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id, |
1747
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_profile_ids, |
1748
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3, |
1749
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1, |
1750
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2035, |
1751
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2050, |
1752
|
|
|
).filter( |
1753
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3.in_( |
1754
|
|
|
list_of_identifiers |
1755
|
|
|
) |
1756
|
|
|
) |
1757
|
|
|
elif attribute == "nuts1": |
1758
|
|
|
cells_query = session.query( |
1759
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id, |
1760
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_profile_ids, |
1761
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3, |
1762
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1, |
1763
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2035, |
1764
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2050, |
1765
|
|
|
).filter( |
1766
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1.in_( |
1767
|
|
|
list_of_identifiers |
1768
|
|
|
) |
1769
|
|
|
) |
1770
|
|
|
elif attribute == "cell_id": |
1771
|
|
|
cells_query = session.query( |
1772
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id, |
1773
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_profile_ids, |
1774
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts3, |
1775
|
|
|
HouseholdElectricityProfilesInCensusCells.nuts1, |
1776
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2035, |
1777
|
|
|
HouseholdElectricityProfilesInCensusCells.factor_2050, |
1778
|
|
|
).filter( |
1779
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id.in_( |
1780
|
|
|
list_of_identifiers |
1781
|
|
|
) |
1782
|
|
|
) |
1783
|
|
|
|
1784
|
|
|
cell_demand_metadata = pd.read_sql( |
1785
|
|
|
cells_query.statement, cells_query.session.bind, index_col="cell_id" |
|
|
|
|
1786
|
|
|
) |
1787
|
|
|
return cell_demand_metadata |
1788
|
|
|
|
1789
|
|
|
|
1790
|
|
|
def get_hh_profiles_from_db(profile_ids): |
1791
|
|
|
""" |
1792
|
|
|
Retrieve selection of household electricity demand profiles |
1793
|
|
|
|
1794
|
|
|
Parameters |
1795
|
|
|
---------- |
1796
|
|
|
profile_ids: list of str (str, int) |
1797
|
|
|
(type)a00..(profile number) with number having exactly 4 digits |
1798
|
|
|
|
1799
|
|
|
|
1800
|
|
|
See Also |
1801
|
|
|
-------- |
1802
|
|
|
:func:`houseprofiles_in_census_cells` |
1803
|
|
|
|
1804
|
|
|
Returns |
1805
|
|
|
------- |
1806
|
|
|
pd.DataFrame |
1807
|
|
|
Selection of household demand profiles |
1808
|
|
|
""" |
1809
|
|
|
# Query load profiles |
1810
|
|
|
with db.session_scope() as session: |
1811
|
|
|
cells_query = session.query( |
1812
|
|
|
IeeHouseholdLoadProfiles.load_in_wh, IeeHouseholdLoadProfiles.type |
1813
|
|
|
).filter(IeeHouseholdLoadProfiles.type.in_(profile_ids)) |
1814
|
|
|
|
1815
|
|
|
df_profile_loads = pd.read_sql( |
1816
|
|
|
cells_query.statement, cells_query.session.bind, index_col="type" |
1817
|
|
|
) |
1818
|
|
|
|
1819
|
|
|
# convert array to Dataframe |
1820
|
|
|
df_profile_loads = pd.DataFrame.from_records( |
1821
|
|
|
df_profile_loads["load_in_wh"], index=df_profile_loads.index |
1822
|
|
|
).T |
1823
|
|
|
|
1824
|
|
|
return df_profile_loads |
1825
|
|
|
|
1826
|
|
|
def get_demand_regio_hh_profiles_from_db(year): |
1827
|
|
|
""" |
1828
|
|
|
Retrieve demand regio household electricity demand profiles in nuts3 level |
1829
|
|
|
|
1830
|
|
|
Parameters |
1831
|
|
|
---------- |
1832
|
|
|
year: int |
1833
|
|
|
To which year belong the required demand profile |
1834
|
|
|
|
1835
|
|
|
Returns |
1836
|
|
|
------- |
1837
|
|
|
pd.DataFrame |
1838
|
|
|
Selection of household demand profiles |
1839
|
|
|
""" |
1840
|
|
|
|
1841
|
|
|
query = """Select * from demand.demandregio_household_load_profiles |
1842
|
|
|
Where year = year""" |
1843
|
|
|
|
1844
|
|
|
df_profile_loads = pd.read_sql( |
1845
|
|
|
query, db.engine(), index_col="id" |
1846
|
|
|
) |
1847
|
|
|
|
1848
|
|
|
return df_profile_loads |
1849
|
|
|
|
1850
|
|
|
def mv_grid_district_HH_electricity_load(scenario_name, scenario_year): |
1851
|
|
|
""" |
1852
|
|
|
Aggregated household demand time series at HV/MV substation level |
1853
|
|
|
|
1854
|
|
|
Calculate the aggregated demand time series based on the demand profiles |
1855
|
|
|
of each zensus cell inside each MV grid district. Profiles are read from |
1856
|
|
|
local hdf5-file or demand timeseries per nuts3 in db. |
1857
|
|
|
Creates table `demand.egon_etrago_electricity_households` with |
1858
|
|
|
Household electricity demand profiles aggregated at MV grid district level |
1859
|
|
|
in MWh. Primarily used to create the eTraGo data model. |
1860
|
|
|
|
1861
|
|
|
Parameters |
1862
|
|
|
---------- |
1863
|
|
|
scenario_name: str |
1864
|
|
|
Scenario name identifier, i.e. "eGon2035" |
1865
|
|
|
scenario_year: int |
1866
|
|
|
Scenario year according to `scenario_name` |
1867
|
|
|
|
1868
|
|
|
Returns |
1869
|
|
|
------- |
1870
|
|
|
pd.DataFrame |
1871
|
|
|
Multiindexed dataframe with `timestep` and `bus_id` as indexers. |
1872
|
|
|
Demand is given in kWh. |
1873
|
|
|
""" |
1874
|
|
|
|
1875
|
|
|
def tuple_format(x): |
1876
|
|
|
"""Convert Profile ids from string to tuple (type, id) |
1877
|
|
|
Convert from (str)a000(int) to (str), (int) |
1878
|
|
|
""" |
1879
|
|
|
return x[:2], int(x[3:]) |
1880
|
|
|
|
1881
|
|
|
with db.session_scope() as session: |
1882
|
|
|
cells_query = session.query( |
1883
|
|
|
HouseholdElectricityProfilesInCensusCells, |
1884
|
|
|
MapZensusGridDistricts.bus_id, |
1885
|
|
|
).join( |
1886
|
|
|
MapZensusGridDistricts, |
1887
|
|
|
HouseholdElectricityProfilesInCensusCells.cell_id |
1888
|
|
|
== MapZensusGridDistricts.zensus_population_id, |
1889
|
|
|
) |
1890
|
|
|
|
1891
|
|
|
cells = pd.read_sql( |
1892
|
|
|
cells_query.statement, cells_query.session.bind, index_col="cell_id" |
1893
|
|
|
) |
1894
|
|
|
|
1895
|
|
|
method = egon.data.config.settings()["egon-data"][ |
1896
|
|
|
"--household-electrical-demand-source" |
1897
|
|
|
] |
1898
|
|
|
|
1899
|
|
|
if method == "slp": |
1900
|
|
|
#Import demand regio timeseries demand per nuts3 area |
1901
|
|
|
dr_series = pd.read_sql_query(""" |
1902
|
|
|
SELECT year, nuts3, load_in_mwh FROM demand.demandregio_household_load_profiles |
1903
|
|
|
""", |
1904
|
|
|
con = engine |
1905
|
|
|
) |
1906
|
|
|
dr_series = dr_series[dr_series["year"] == scenario_year] |
1907
|
|
|
dr_series.drop(columns=["year"], inplace=True) |
1908
|
|
|
dr_series.set_index("nuts3", inplace=True) |
1909
|
|
|
dr_series = dr_series.squeeze() |
1910
|
|
|
|
1911
|
|
|
#Population data per cell_id is used to scale the demand per nuts3 |
1912
|
|
|
population = pd.read_sql_query(""" |
1913
|
|
|
SELECT grid_id, population FROM society.destatis_zensus_population_per_ha |
1914
|
|
|
""", |
1915
|
|
|
con = engine |
1916
|
|
|
) |
1917
|
|
|
population.set_index("grid_id", inplace=True) |
1918
|
|
|
population = population.squeeze() |
1919
|
|
|
population.loc[population==-1] = 0 |
1920
|
|
|
|
1921
|
|
|
cells["population"] = cells["grid_id"].map(population) |
1922
|
|
|
|
1923
|
|
|
factor_column = f"""factor_{scenario_year}""" |
1924
|
|
|
|
1925
|
|
|
mvgd_profiles = pd.DataFrame( |
1926
|
|
|
columns=["p_set", "q_set"], index=cells.bus_id.unique() |
1927
|
|
|
) |
1928
|
|
|
mvgd_profiles.index.name = "bus_id" |
1929
|
|
|
|
1930
|
|
|
for nuts3, df in cells.groupby("nuts3"): |
1931
|
|
|
cells.loc[df.index, factor_column] = df["population"] / df["population"].sum() |
1932
|
|
|
|
1933
|
|
|
for bus, df_bus in cells.groupby("bus_id"): |
1934
|
|
|
load_nuts = [0] * 8760 |
1935
|
|
|
for nuts3, df_nuts in df_bus.groupby("nuts3"): |
1936
|
|
|
factor_nuts = df_nuts[factor_column].sum() |
1937
|
|
|
total_load = [x * factor_nuts for x in dr_series[nuts3]] |
1938
|
|
|
load_nuts = [sum(x) for x in zip(load_nuts, total_load)] |
1939
|
|
|
mvgd_profiles.at[bus, "p_set"] = load_nuts |
1940
|
|
|
|
1941
|
|
|
mvgd_profiles.reset_index(inplace=True) |
1942
|
|
|
|
1943
|
|
|
elif method == "bottom-up-profiles": |
1944
|
|
|
# convert profile ids to tuple (type, id) format |
1945
|
|
|
cells["cell_profile_ids"] = cells["cell_profile_ids"].apply( |
1946
|
|
|
lambda x: list(map(tuple_format, x)) |
1947
|
|
|
) |
1948
|
|
|
|
1949
|
|
|
# Read demand profiles from egon-data-bundle |
1950
|
|
|
df_iee_profiles = get_iee_hh_demand_profiles_raw() |
1951
|
|
|
|
1952
|
|
|
# Process profiles for further use |
1953
|
|
|
df_iee_profiles = set_multiindex_to_profiles(df_iee_profiles) |
1954
|
|
|
|
1955
|
|
|
# Create aggregated load profile for each MV grid district |
1956
|
|
|
mvgd_profiles_dict = {} |
1957
|
|
|
for grid_district, data in cells.groupby("bus_id"): |
1958
|
|
|
mvgd_profile = get_load_timeseries( |
1959
|
|
|
df_iee_profiles=df_iee_profiles, |
1960
|
|
|
df_hh_profiles_in_census_cells=data, |
1961
|
|
|
cell_ids=data.index, |
1962
|
|
|
year=scenario_year, |
1963
|
|
|
peak_load_only=False, |
1964
|
|
|
) |
1965
|
|
|
mvgd_profiles_dict[grid_district] = [mvgd_profile.round(3).to_list()] |
1966
|
|
|
mvgd_profiles = pd.DataFrame.from_dict(mvgd_profiles_dict, orient="index") |
1967
|
|
|
|
1968
|
|
|
# Reshape data: put MV grid ids in columns to a single index column |
1969
|
|
|
mvgd_profiles = mvgd_profiles.reset_index() |
1970
|
|
|
mvgd_profiles.columns = ["bus_id", "p_set"] |
1971
|
|
|
|
1972
|
|
|
# Add remaining columns |
1973
|
|
|
mvgd_profiles["scn_name"] = scenario_name |
1974
|
|
|
|
1975
|
|
|
# Insert data into respective database table |
1976
|
|
|
mvgd_profiles.to_sql( |
|
|
|
|
1977
|
|
|
name=EgonEtragoElectricityHouseholds.__table__.name, |
1978
|
|
|
schema=EgonEtragoElectricityHouseholds.__table__.schema, |
1979
|
|
|
con=engine, |
1980
|
|
|
if_exists="append", |
1981
|
|
|
method="multi", |
1982
|
|
|
chunksize=10000, |
1983
|
|
|
index=False, |
1984
|
|
|
) |
1985
|
|
|
|
1986
|
|
|
|
1987
|
|
|
def get_scaled_profiles_from_db( |
1988
|
|
|
attribute, list_of_identifiers, year, aggregate=True, peak_load_only=False |
1989
|
|
|
): |
1990
|
|
|
"""Retrieve selection of scaled household electricity demand profiles |
1991
|
|
|
|
1992
|
|
|
Parameters |
1993
|
|
|
---------- |
1994
|
|
|
attribute: str |
1995
|
|
|
attribute to filter the table |
1996
|
|
|
|
1997
|
|
|
* nuts3 |
1998
|
|
|
* nuts1 |
1999
|
|
|
* cell_id |
2000
|
|
|
|
2001
|
|
|
list_of_identifiers: list of str/int |
2002
|
|
|
nuts3/nuts1 need to be str |
2003
|
|
|
cell_id need to be int |
2004
|
|
|
|
2005
|
|
|
year: int |
2006
|
|
|
* 2035 |
2007
|
|
|
* 2050 |
2008
|
|
|
|
2009
|
|
|
aggregate: bool |
2010
|
|
|
If True, all profiles are summed. This uses a lot of RAM if a high |
2011
|
|
|
attribute level is chosen |
2012
|
|
|
|
2013
|
|
|
peak_load_only: bool |
2014
|
|
|
If True, only peak load value is returned |
2015
|
|
|
|
2016
|
|
|
Notes |
2017
|
|
|
----- |
2018
|
|
|
Aggregate == False option can use a lot of RAM if many profiles are selected |
2019
|
|
|
|
2020
|
|
|
|
2021
|
|
|
Returns |
2022
|
|
|
------- |
2023
|
|
|
pd.Series or float |
2024
|
|
|
Aggregated time series for given `cell_ids` or peak load of this time |
2025
|
|
|
series in MWh. |
2026
|
|
|
""" |
2027
|
|
|
cell_demand_metadata = get_cell_demand_metadata_from_db( |
2028
|
|
|
attribute=attribute, list_of_identifiers=list_of_identifiers |
2029
|
|
|
) |
2030
|
|
|
profile_ids = cell_demand_metadata.cell_profile_ids.sum() |
2031
|
|
|
|
2032
|
|
|
df_iee_profiles = get_hh_profiles_from_db(profile_ids) |
2033
|
|
|
|
2034
|
|
|
scaled_profiles = get_load_timeseries( |
2035
|
|
|
df_iee_profiles=df_iee_profiles, |
2036
|
|
|
df_hh_profiles_in_census_cells=cell_demand_metadata, |
2037
|
|
|
cell_ids=cell_demand_metadata.index.to_list(), |
2038
|
|
|
year=year, |
2039
|
|
|
aggregate=aggregate, |
2040
|
|
|
peak_load_only=peak_load_only, |
2041
|
|
|
) |
2042
|
|
|
return scaled_profiles |
2043
|
|
|
|