Passed
Pull Request — dev (#1375)
by
unknown
05:22
created

create_buildings_residential_zensus_mapping()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nop 0
1
"""
2
Filtering and preprocessing of buildings, streets and amenities from OpenStreetMap
3
4
"""
5
6
import os
7
8
from egon.data import db
9
from egon.data.datasets import Dataset
10
from egon_validation import (
11
    RowCountValidation,
12
    DataTypeValidation,
13
    WholeTableNotNullAndNotNaNValidation,
14
    SRIDUniqueNonZero
15
)
16
17
18
def execute_sql_script(script):
19
    """Execute SQL script
20
21
    Parameters
22
    ----------
23
    script : str
24
        Filename of script
25
    """
26
    db.execute_sql_script(os.path.join(os.path.dirname(__file__), script))
27
28
29
def preprocessing():
30
    print("Extracting buildings, amenities and shops...")
31
    sql_scripts = [
32
        "osm_amenities_shops_preprocessing.sql",
33
        "osm_buildings_extract.sql",
34
    ]
35
    for script in sql_scripts:
36
        execute_sql_script(script)
37
38
39
def filter_buildings():
40
    print("Filter buildings...")
41
    execute_sql_script("osm_buildings_filter.sql")
42
43
44
def filter_buildings_residential():
45
    print("Filter residential buildings...")
46
    execute_sql_script("osm_buildings_filter_residential.sql")
47
48
49
def extend_buildings_residential():
50
    print(
51
        "Extend residential buildings by commercial/retail/office/hotel "
52
        "buildings in cells with census population but without buildings..."
53
    )
54
    execute_sql_script("osm_buildings_extend_residential.sql")
55
56
57
def create_buildings_filtered_zensus_mapping():
58
    print(
59
        "Create census mapping table for filtered buildings in populated areas..."
60
    )
61
    execute_sql_script("osm_buildings_filtered_zensus_mapping.sql")
62
63
64
def create_buildings_filtered_all_zensus_mapping():
65
    print("Create census mapping table for all filtered buildings...")
66
    execute_sql_script("osm_buildings_filtered_all_zensus_mapping.sql")
67
68
69
def create_buildings_residential_zensus_mapping():
70
    print("Create census mapping table for residential buildings...")
71
    execute_sql_script("osm_buildings_residential_zensus_mapping.sql")
72
73
74
def create_buildings_temp_tables():
75
    print("Create temp tables for buildings...")
76
    execute_sql_script("osm_buildings_temp_tables.sql")
77
78
79
def extract_buildings_w_amenities():
80
    print("Extracting buildings with amenities...")
81
    execute_sql_script("osm_results_buildings_w_amenities.sql")
82
83
84
def extract_buildings_wo_amenities():
85
    print("Extracting buildings without amenities...")
86
    execute_sql_script("osm_results_buildings_wo_amenities.sql")
87
88
89
def extract_amenities():
90
    print("Extracting amenities...")
91
    execute_sql_script("osm_results_amenities.sql")
92
93
94
def extract_buildings_filtered_amenities():
95
    print("Extracting buildings filtered with and without amenities...")
96
    execute_sql_script("osm_buildings_filter_amenities.sql")
97
98
99
def extract_ways():
100
    print("Extracting ways...")
101
    execute_sql_script("osm_ways_preprocessing.sql")
102
103
104
def drop_temp_tables():
105
    print("Dropping temp tables...")
106
    execute_sql_script("drop_temp_tables.sql")
107
108
109
def add_metadata():
110
    pass
111
112
113
class OsmBuildingsStreets(Dataset):
114
    """
115
    Filter and preprocess buildings, streets and amenities from OpenStreetMap (OSM).
116
117
    This dataset on buildings and amenities is required by several tasks in the
118
    pipeline, such as the distribution of household demand profiles or PV home
119
    systems to buildings. This data is enriched by population and apartments from
120
    Zensus 2011. Those derived datasets and the data on streets will be used in the
121
    DIstribution Network Generat0r
122
    `ding0 <https://github.com/openego/ding0>`_ e.g. to cluster loads and create low
123
    voltage grids.
124
125
    *Dependencies*
126
      * :py:class:`OpenStreetMap <egon.data.datasets.osm.OpenStreetMap>`
127
      * :py:class:`ZensusMiscellaneous <egon.data.datasets.zensus.ZensusMiscellaneous>`
128
129
    *Resulting Tables*
130
      * openstreetmap.osm_buildings is created and filled (table has no associated python class)
131
      * openstreetmap.osm_buildings_filtered is created and filled (table has no associated python class)
132
      * openstreetmap.osm_buildings_residential is created and filled (table has no associated python class)
133
      * openstreetmap.osm_amenities_shops_filtered is created and filled (table has no associated python class)
134
      * openstreetmap.osm_buildings_with_amenities is created and filled (table has no associated python class)
135
      * openstreetmap.osm_buildings_without_amenities is created and filled (table has no associated python class)
136
      * openstreetmap.osm_amenities_not_in_buildings is created and filled (table has no associated python class)
137
      * openstreetmap.osm_ways_preprocessed is created and filled (table has no associated python class)
138
      * openstreetmap.osm_ways_with_segments is created and filled (table has no associated python class)
139
      * boundaries.egon_map_zensus_buildings_filtered is created and filled (table has no associated python class)
140
      * boundaries.egon_map_zensus_buildings_residential is created and filled (table has no associated python class)
141
      * openstreetmap.osm_buildings is created and filled (table has no associated python class)
142
143
    **Details and Steps**
144
145
    * Extract buildings and filter using relevant tags, e.g. residential and
146
      commercial, see script `osm_buildings_filter.sql` for the full list of tags.
147
      Resulting tables:
148
      * All buildings: `openstreetmap.osm_buildings`
149
      * Filtered buildings: `openstreetmap.osm_buildings_filtered`
150
      * Residential buildings: `openstreetmap.osm_buildings_residential`
151
        * 1st step: Filter by tags (see `osm_buildings_filter_residential.sql`)
152
        * 2nd step: Table is extended by finding census cells with population
153
          but no residential buildings and extended by commercial/retail/office/
154
          hotel buildings (see `osm_buildings_extend_residential.sql`) since they
155
          often include apartments as well.
156
    * Extract amenities and filter using relevant tags, e.g. shops and restaurants,
157
      see script `osm_amenities_shops_preprocessing.sql` for the full list of tags.
158
      Resulting table: `openstreetmap.osm_amenities_shops_filtered`
159
    * Create a mapping table for building's osm IDs to the Zensus cells the
160
      building's centroid is located in.
161
      Resulting tables:
162
      * `boundaries.egon_map_zensus_buildings_filtered` (filtered)
163
      * `boundaries.egon_map_zensus_buildings_residential` (residential only)
164
    * Enrich each building by number of apartments from Zensus table
165
      `society.egon_destatis_zensus_apartment_building_population_per_ha`
166
      by splitting up the cell's sum equally to the buildings. In some cases, a
167
      Zensus cell does not contain buildings but there's a building nearby which
168
      the no. of apartments is to be allocated to. To make sure apartments are
169
      allocated to at least one building, a radius of 77m is used to catch building
170
      geometries.
171
    * Split filtered buildings into 3 datasets using the amenities' locations:
172
      temporary tables are created in script `osm_buildings_temp_tables.sql` the
173
      final tables in `osm_buildings_amentities_results.sql`.
174
      Resulting tables:
175
176
      * Buildings w/ amenities: `openstreetmap.osm_buildings_with_amenities`
177
      * Buildings w/o amenities: `openstreetmap.osm_buildings_without_amenities`
178
      * Amenities not allocated to buildings:
179
        `openstreetmap.osm_amenities_not_in_buildings`
180
    * Extract streets (OSM ways) and filter using relevant tags, e.g.
181
      highway=secondary, see script `osm_ways_preprocessing.sql` for the full list
182
      of tags. Additionally, each way is split into its line segments and their
183
      lengths is retained.
184
      Resulting tables:
185
      * Filtered streets: `openstreetmap.osm_ways_preprocessed`
186
      * Filtered streets w/ segments: `openstreetmap.osm_ways_with_segments`
187
188
    """
189
190
    #:
191
    name: str = "OsmBuildingsStreets"
192
    #:
193
    version: str = "0.0.7"
194
195
    def __init__(self, dependencies):
196
        super().__init__(
197
            name=self.name,
198
            version=self.version,
199
            dependencies=dependencies,
200
            tasks=(
201
                preprocessing,
202
                {filter_buildings, filter_buildings_residential},
203
                extend_buildings_residential,
204
                extract_buildings_filtered_amenities,
205
                {
206
                    create_buildings_filtered_zensus_mapping,
207
                    create_buildings_residential_zensus_mapping,
208
                    create_buildings_filtered_all_zensus_mapping,
209
                },
210
                create_buildings_temp_tables,
211
                {
212
                    extract_buildings_w_amenities,
213
                    extract_buildings_wo_amenities,
214
                    extract_amenities,
215
                    extract_ways,
216
                },
217
                drop_temp_tables,
218
                add_metadata,
219
            ),
220
            validation={
221
                "data_quality": [
222
                    RowCountValidation(
223
                        table="boundaries.egon_map_zensus_buildings_filtered",
224
                        rule_id="TEST_ROW_COUNT.egon_map_zensus_buildings_filtered",
225
                        expected_count={"Schleswig-Holstein":1010387,
226
                                        "Everything":28070301}
227
                    ),
228
                    DataTypeValidation(
229
                        table="boundaries.egon_map_zensus_buildings_filtered",
230
                        rule_id="DATA_MULTIPLE_TYPES.egon_map_zensus_buildings_filtered",
231
                        column_types={"id": "integer", "grid_id": "character varying", "cell_id": "integer"}
232
                    ),
233
                    WholeTableNotNullAndNotNaNValidation(
234
                        table="boundaries.egon_map_zensus_buildings_filtered",
235
                        rule_id="WHOLE_TABLE_NOT_NAN.egon_map_zensus_buildings_filtered"
236
                    ),
237
                    RowCountValidation(
238
                        table="boundaries.egon_map_zensus_buildings_residential",
239
                        rule_id="ROW_COUNT.egon_map_zensus_buildings_residential",
240
                        expected_count={"Schleswig-Holstein":989967,
241
                                        "Everything":27477467}
242
                    ),
243
                    DataTypeValidation(
244
                        table="boundaries.egon_map_zensus_buildings_residential",
245
                        rule_id="DATA_MULTIPLE_TYPES.egon_map_zensus_buildings_residential",
246
                        column_types={"id": "integer", "grid_id": "character varying", "cell_id": "integer"}
247
                    ),
248
                    WholeTableNotNullAndNotNaNValidation(
249
                        table="boundaries.egon_map_zensus_buildings_residential",
250
                        rule_id="WHOLE_TABLE_NOT_NAN.egon_map_zensus_buildings_residential"
251
                    ),
252
                    RowCountValidation(
253
                        table="openstreetmap.osm_amenities_not_in_buildings",
254
                        rule_id="ROW_COUNT.osm_amenities_not_in_buildings",
255
                        expected_count={"Schleswig-Holstein": 3142,
256
                                        "Everything": 79928}
257
                    ),
258
                    DataTypeValidation(
259
                        table="openstreetmap.osm_amenities_not_in_buildings",
260
                        rule_id="DATA_MULTIPLE_TYPES.osm_amenities_not_in_buildings",
261
                        column_types={
262
                            "osm_id": "bigint", "amenity": "text", "name": "text", "geom_amenity": "geometry",
263
                            "tags": "hstore", "egon_amenity_id": "integer" }
264
                    ),
265
                    WholeTableNotNullAndNotNaNValidation(
266
                        table="openstreetmap.osm_amenities_not_in_buildings",
267
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_amenities_not_in_buildings"
268
                    ),
269
                    SRIDUniqueNonZero(
270
                        table="openstreetmap.osm_amenities_not_in_buildings",
271
                        rule_id="SRIDUniqueNonZero.osm_amenities_not_in_buildings.geom_amenity",
272
                        column="geom_amenity"
273
                    ),
274
                    RowCountValidation(
275
                        table="openstreetmap.osm_amenities_shops_filtered",
276
                        rule_id="ROW_COUNT.osm_amenities_shops_filtered",
277
                        expected_count={"Schleswig-Holstein": 27438, "Everything": 700315}
278
                    ),
279
                    DataTypeValidation(
280
                        table="openstreetmap.osm_amenities_shops_filtered",
281
                        rule_id="DATA_MULTIPLE_TYPES.osm_amenities_shops_filtered",
282
                        column_types={
283
                            "osm_id": "bigint", "amenity": "text", "name": "text", "geom_amenity": "geometry",
284
                            "tags": "hstore", "egon_amenity_id": "integer"}
285
                    ),
286
                    WholeTableNotNullAndNotNaNValidation(
287
                        table="openstreetmap.osm_amenities_shops_filtered",
288
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_amenities_shops_filtered"
289
                    ),
290
                    SRIDUniqueNonZero(
291
                        table="openstreetmap.osm_amenities_shops_filtered",
292
                        rule_id="SRIDUniqueNonZero.osm_amenities_shops_filtered.geom_amenity",
293
                        column="geom_amenity"
294
                    ),
295
                    RowCountValidation(
296
                        table="openstreetmap.osm_buildings",
297
                        rule_id="ROW_COUNT.osm_buildings",
298
                        expected_count={"Schleswig-Holstein": 1298230, "Everything": 34328483}
299
                    ),
300
                    DataTypeValidation(
301
                        table="openstreetmap.osm_buildings",
302
                        rule_id="DATA_MULTIPLE_TYPES.osm_buildings",
303
                        column_types={
304
                            "osm_id": "bigint", "amenity": "text", "building": "text", "name": "text",
305
                            "geom_building": "geometry", "area": "double precision", "geom_point": "geometry",
306
                            "tags": "hstore", "id": "integer"}
307
                    ),
308
                    WholeTableNotNullAndNotNaNValidation(
309
                        table="openstreetmap.osm_buildings",
310
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_buildings"
311
                    ),
312
                    SRIDUniqueNonZero(
313
                        table="openstreetmap.osm_buildings",
314
                        rule_id="SRIDUniqueNonZero.osm_buildings.geom_building",
315
                        column="geom_building"
316
                    ),
317
                    SRIDUniqueNonZero(
318
                        table="openstreetmap.osm_buildings",
319
                        rule_id="SRIDUniqueNonZero.osm_buildings.geom_point",
320
                        column="geom_point"
321
                    ),
322
                    RowCountValidation(
323
                        table="openstreetmap.osm_buildings_filtered",
324
                        rule_id="ROW_COUNT.osm_buildings_filtered",
325
                        expected_count={"Schleswig-Holstein": 1169881, "Everything": 31619905}
326
                    ),
327
                    DataTypeValidation(
328
                        table="openstreetmap.osm_buildings_filtered",
329
                        rule_id="DATA_MULTIPLE_TYPES.osm_buildings_filtered",
330
                        column_types={
331
                            "osm_id": "bigint", "amenity": "text", "building": "text", "name": "text",
332
                            "geom_building": "geometry", "area": "double precision", "geom_point": "geometry",
333
                            "tags": "hstore", "id": "integer"}
334
                    ),
335
                    WholeTableNotNullAndNotNaNValidation(
336
                        table="openstreetmap.osm_buildings_filtered",
337
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_buildings_filtered"
338
                    ),
339
                    SRIDUniqueNonZero(
340
                        table="openstreetmap.osm_buildings_filtered",
341
                        rule_id="SRIDUniqueNonZero.osm_buildings_filtered.geom_building",
342
                        column="geom_building"
343
                    ),
344
                    SRIDUniqueNonZero(
345
                        table="openstreetmap.osm_buildings_filtered",
346
                        rule_id="SRIDUniqueNonZero.osm_buildings_filtered.geom_point",
347
                        column="geom_point"
348
                    ),
349
                    RowCountValidation(
350
                        table="openstreetmap.osm_buildings_residential",
351
                        rule_id="ROW_COUNT.osm_buildings_residential",
352
                        expected_count={"Schleswig-Holstein": 1130929, "Everything": 30713011}
353
                    ),
354
                    DataTypeValidation(
355
                        table="openstreetmap.osm_buildings_residential",
356
                        rule_id="DATA_MULTIPLE_TYPES.osm_buildings_residential",
357
                        column_types={
358
                            "osm_id": "bigint", "amenity": "text", "building": "text", "name": "text",
359
                            "geom_building": "geometry", "area": "double precision", "geom_point": "geometry",
360
                            "tags": "hstore", "id": "integer"}
361
                    ),
362
                    WholeTableNotNullAndNotNaNValidation(
363
                        table="openstreetmap.osm_buildings_residential",
364
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_buildings_residential"
365
                    ),
366
                    SRIDUniqueNonZero(
367
                        table="openstreetmap.osm_buildings_residential",
368
                        rule_id="SRIDUniqueNonZero.osm_buildings_residential.geom_building",
369
                        column="geom_building"
370
                    ),
371
                    SRIDUniqueNonZero(
372
                        table="openstreetmap.osm_buildings_residential",
373
                        rule_id="SRIDUniqueNonZero.osm_buildings_residential.geom_point",
374
                        column="geom_point"
375
                    ),
376
                    RowCountValidation(
377
                        table="openstreetmap.osm_buildings_synthetic",
378
                        rule_id="ROW_COUNT.osm_buildings_synthetic",
379
                        expected_count={"Schleswig-Holstein": 9498, "Everything": 706911}
380
                    ),
381
                    DataTypeValidation(
382
                        table="openstreetmap.osm_buildings_synthetic",
383
                        rule_id="DATA_MULTIPLE_TYPES.osm_buildings_synthetic",
384
                        column_types={
385
                            "id": "character varying", "cell_id": "character varying", "geom_building": "geometry",
386
                            "geom_point": "geometry", "n_amenities_inside": "integer", "building": "character varying",
387
                            "area": "real"}
388
                    ),
389
                    WholeTableNotNullAndNotNaNValidation(
390
                        table="openstreetmap.osm_buildings_synthetic",
391
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_buildings_synthetic"
392
                    ),
393
                    SRIDUniqueNonZero(
394
                        table="openstreetmap.osm_buildings_synthetic",
395
                        rule_id="SRIDUniqueNonZero.osm_buildings_synthetic.geom_building",
396
                        column="geom_building"
397
                    ),
398
                    SRIDUniqueNonZero(
399
                        table="openstreetmap.osm_buildings_synthetic",
400
                        rule_id="SRIDUniqueNonZero.osm_buildings_synthetic.geom_point",
401
                        column="geom_point"
402
                    ),
403
                    RowCountValidation(
404
                        table="openstreetmap.osm_buildings_with_amenities",
405
                        rule_id="ROW_COUNT.osm_buildings_with_amenities",
406
                        expected_count={"Schleswig-Holstein": 24314, "Everything": 621385}
407
                    ),
408
                    DataTypeValidation(
409
                        table="openstreetmap.osm_buildings_with_amenities",
410
                        rule_id="DATA_MULTIPLE_TYPES.osm_buildings_with_amenities",
411
                        column_types={
412
                            "osm_id_amenity": "bigint",
413
                            "osm_id_building": "bigint",
414
                            "id": "integer",
415
                            "building": "text",
416
                            "area": "double precision",
417
                            "geom_building": "geometry",
418
                            "geom_amenity": "geometry",
419
                            "geom_point": "geometry",
420
                            "name": "text",
421
                            "tags_building": "hstore",
422
                            "tags_amenity": "hstore",
423
                            "n_amenities_inside": "bigint",
424
                            "apartment_count": "numeric"}
425
                    ),
426
                    WholeTableNotNullAndNotNaNValidation(
427
                        table="openstreetmap.osm_buildings_with_amenities",
428
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_buildings_with_amenities"
429
                    ),
430
                    SRIDUniqueNonZero(
431
                        table="openstreetmap.osm_buildings_with_amenities",
432
                        rule_id="SRIDUniqueNonZero.osm_buildings_with_amenities.geom_building",
433
                        column="geom_building"
434
                    ),
435
                    SRIDUniqueNonZero(
436
                        table="openstreetmap.osm_buildings_with_amenities",
437
                        rule_id="SRIDUniqueNonZero.osm_buildings_with_amenities.geom_amenity",
438
                        column="geom_amenity"
439
                    ),
440
                    SRIDUniqueNonZero(
441
                        table="openstreetmap.osm_buildings_with_amenities",
442
                        rule_id="SRIDUniqueNonZero.osm_buildings_with_amenities.geom_point",
443
                        column="geom_point"
444
                    ),
445
                    RowCountValidation(
446
                        table="openstreetmap.osm_buildings_without_amenities",
447
                        rule_id="ROW_COUNT.osm_buildings_without_amenities",
448
                        expected_count={"Schleswig-Holstein": 1152146, "Everything": 31151277}
449
                    ),
450
                    DataTypeValidation(
451
                        table="openstreetmap.osm_buildings_without_amenities",
452
                        rule_id="DATA_MULTIPLE_TYPES.osm_buildings_without_amenities",
453
                        column_types={
454
                            "osm_id": "bigint",
455
                            "id": "integer",
456
                            "building": "text",
457
                            "area": "double precision",
458
                            "geom_building": "geometry",
459
                            "geom_point": "geometry",
460
                            "name": "text",
461
                            "tags": "hstore",
462
                            "apartment_count": "numeric"}
463
                    ),
464
                    WholeTableNotNullAndNotNaNValidation(
465
                        table="openstreetmap.osm_buildings_without_amenities",
466
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_buildings_without_amenities"
467
                    ),
468
                    SRIDUniqueNonZero(
469
                        table="openstreetmap.osm_buildings_without_amenities",
470
                        rule_id="SRIDUniqueNonZero.osm_buildings_without_amenities.geom_building",
471
                        column="geom_building"
472
                    ),
473
                    SRIDUniqueNonZero(
474
                        table="openstreetmap.osm_buildings_without_amenities",
475
                        rule_id="SRIDUniqueNonZero.osm_buildings_without_amenities.geom_point",
476
                        column="geom_point"
477
                    ),
478
                    RowCountValidation(
479
                        table="openstreetmap.osm_ways_with_segments",
480
                        rule_id="ROW_COUNT.osm_ways_with_segments",
481
                        expected_count={"Schleswig-Holstein": 263427, "Everything": 6716196}
482
                    ),
483
                    DataTypeValidation(
484
                        table="openstreetmap.osm_ways_with_segments",
485
                        rule_id="DATA_MULTIPLE_TYPES.osm_ways_with_segments",
486
                        column_types={
487
                            "osm_id": "bigint",
488
                            "nodes": "bigint[]",
489
                            "highway": "text",
490
                            "geom": "geometry",
491
                            "length_segments": "double precision[]"}
492
                    ),
493
                    WholeTableNotNullAndNotNaNValidation(
494
                        table="openstreetmap.osm_ways_with_segments",
495
                        rule_id="WHOLE_TABLE_NOT_NAN.osm_ways_with_segments"
496
                    ),
497
                    SRIDUniqueNonZero(
498
                        table="openstreetmap.osm_ways_with_segments",
499
                        rule_id="SRIDUniqueNonZero.osm_ways_with_segments.geom",
500
                        column="geom"
501
                    ),
502
                ]
503
            },
504
            on_validation_failure="continue"
505
        )
506