1
|
|
|
"""The central module containing all code dealing with importing OSM data. |
2
|
|
|
|
3
|
|
|
This module either directly contains the code dealing with importing OSM |
4
|
|
|
data, or it re-exports everything needed to handle it. Please refrain |
5
|
|
|
from importing code from any modules below this one, because it might |
6
|
|
|
lead to unwanted behaviour. |
7
|
|
|
|
8
|
|
|
If you have to import code from a module below this one because the code |
9
|
|
|
isn't exported from this module, please file a bug, so we can fix this. |
10
|
|
|
""" |
11
|
|
|
|
12
|
|
|
from urllib.request import urlretrieve |
13
|
|
|
import json |
14
|
|
|
import os |
15
|
|
|
import time |
16
|
|
|
|
17
|
|
|
from egon.data import db |
18
|
|
|
from egon.data.config import settings |
19
|
|
|
from egon.data.datasets import Dataset |
20
|
|
|
import egon.data.config |
21
|
|
|
import egon.data.subprocess as subprocess |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
def download(): |
25
|
|
|
"""Download OpenStreetMap `.pbf` file.""" |
26
|
|
|
data_config = egon.data.config.datasets() |
27
|
|
|
osm_config = data_config["openstreetmap"]["original_data"] |
28
|
|
|
|
29
|
|
|
if settings()["egon-data"]["--dataset-boundary"] == "Everything": |
30
|
|
|
source_url = osm_config["source"]["url"] |
31
|
|
|
target_path = osm_config["target"]["path"] |
32
|
|
|
else: |
33
|
|
|
source_url = osm_config["source"]["url_testmode"] |
34
|
|
|
target_path = osm_config["target"]["path_testmode"] |
35
|
|
|
|
36
|
|
|
target_file = os.path.join(os.path.dirname(__file__), target_path) |
37
|
|
|
|
38
|
|
|
if not os.path.isfile(target_file): |
39
|
|
|
urlretrieve(source_url, target_file) |
40
|
|
|
|
41
|
|
|
|
42
|
|
|
def to_postgres(num_processes=4, cache_size=4096): |
43
|
|
|
"""Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database. |
44
|
|
|
|
45
|
|
|
Parameters |
46
|
|
|
---------- |
47
|
|
|
num_processes : int, optional |
48
|
|
|
Number of parallel processes used for processing during data import |
49
|
|
|
cache_size: int, optional |
50
|
|
|
Memory used during data import |
51
|
|
|
|
52
|
|
|
""" |
53
|
|
|
# Read database configuration from docker-compose.yml |
54
|
|
|
docker_db_config = db.credentials() |
55
|
|
|
|
56
|
|
|
# Get dataset config |
57
|
|
|
data_config = egon.data.config.datasets() |
58
|
|
|
osm_config = data_config["openstreetmap"]["original_data"] |
59
|
|
|
|
60
|
|
|
if settings()["egon-data"]["--dataset-boundary"] == "Everything": |
61
|
|
|
target_path = osm_config["target"]["path"] |
62
|
|
|
else: |
63
|
|
|
target_path = osm_config["target"]["path_testmode"] |
64
|
|
|
|
65
|
|
|
input_file = os.path.join(os.path.dirname(__file__), target_path) |
66
|
|
|
|
67
|
|
|
# Prepare osm2pgsql command |
68
|
|
|
cmd = [ |
69
|
|
|
"osm2pgsql", |
70
|
|
|
"--create", |
71
|
|
|
"--slim", |
72
|
|
|
"--hstore-all", |
73
|
|
|
f"--number-processes {num_processes}", |
74
|
|
|
f"--cache {cache_size}", |
75
|
|
|
f"-H {docker_db_config['HOST']} -P {docker_db_config['PORT']} " |
76
|
|
|
f"-d {docker_db_config['POSTGRES_DB']} " |
77
|
|
|
f"-U {docker_db_config['POSTGRES_USER']}", |
78
|
|
|
f"-p {osm_config['target']['table_prefix']}", |
79
|
|
|
f"-S {osm_config['source']['stylefile']}", |
80
|
|
|
f"{input_file}", |
81
|
|
|
] |
82
|
|
|
|
83
|
|
|
# Execute osm2pgsql for import OSM data |
84
|
|
|
subprocess.run( |
85
|
|
|
" ".join(cmd), |
86
|
|
|
shell=True, |
87
|
|
|
env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]}, |
88
|
|
|
cwd=os.path.dirname(__file__), |
89
|
|
|
) |
90
|
|
|
|
91
|
|
|
|
92
|
|
|
def add_metadata(): |
93
|
|
|
"""Writes metadata JSON string into table comment.""" |
94
|
|
|
# Prepare variables |
95
|
|
|
osm_config = egon.data.config.datasets()["openstreetmap"] |
96
|
|
|
|
97
|
|
|
if settings()["egon-data"]["--dataset-boundary"] == "Everything": |
98
|
|
|
osm_url = osm_config["original_data"]["source"]["url"] |
99
|
|
|
target_path = osm_config["original_data"]["target"]["path"] |
100
|
|
|
else: |
101
|
|
|
osm_url = osm_config["original_data"]["source"]["url_testmode"] |
102
|
|
|
target_path = osm_config["original_data"]["target"]["path_testmode"] |
103
|
|
|
spatial_and_date = os.path.basename(target_path).split("-") |
104
|
|
|
spatial_extend = spatial_and_date[0] |
105
|
|
|
osm_data_date = ( |
106
|
|
|
"20" |
107
|
|
|
+ spatial_and_date[1][0:2] |
108
|
|
|
+ "-" |
109
|
|
|
+ spatial_and_date[1][2:4] |
110
|
|
|
+ "-" |
111
|
|
|
+ spatial_and_date[1][4:6] |
112
|
|
|
) |
113
|
|
|
|
114
|
|
|
# Insert metadata for each table |
115
|
|
|
licenses = [ |
116
|
|
|
{ |
117
|
|
|
"name": "Open Data Commons Open Database License 1.0", |
118
|
|
|
"title": "", |
119
|
|
|
"path": "https://opendatacommons.org/licenses/odbl/1.0/", |
120
|
|
|
"instruction": ( |
121
|
|
|
"You are free: To Share, To Create, To Adapt;" |
122
|
|
|
" As long as you: Attribute, Share-Alike, Keep open!" |
123
|
|
|
), |
124
|
|
|
"attribution": "© Reiner Lemoine Institut", |
125
|
|
|
} |
126
|
|
|
] |
127
|
|
|
for table in osm_config["processed"]["tables"]: |
128
|
|
|
table_suffix = table.split("_")[1] |
129
|
|
|
meta = { |
130
|
|
|
"title": f"OpenStreetMap (OSM) - Germany - {table_suffix}", |
131
|
|
|
"description": ( |
132
|
|
|
"OpenStreetMap is a free, editable map of the" |
133
|
|
|
" whole world that is being built by volunteers" |
134
|
|
|
" largely from scratch and released with" |
135
|
|
|
" an open-content license." |
136
|
|
|
), |
137
|
|
|
"language": ["EN", "DE"], |
138
|
|
|
"spatial": { |
139
|
|
|
"location": "", |
140
|
|
|
"extent": f"{spatial_extend}", |
141
|
|
|
"resolution": "", |
142
|
|
|
}, |
143
|
|
|
"temporal": { |
144
|
|
|
"referenceDate": f"{osm_data_date}", |
145
|
|
|
"timeseries": { |
146
|
|
|
"start": "", |
147
|
|
|
"end": "", |
148
|
|
|
"resolution": "", |
149
|
|
|
"alignment": "", |
150
|
|
|
"aggregationType": "", |
151
|
|
|
}, |
152
|
|
|
}, |
153
|
|
|
"sources": [ |
154
|
|
|
{ |
155
|
|
|
"title": ( |
156
|
|
|
"Geofabrik - Download - OpenStreetMap Data Extracts" |
157
|
|
|
), |
158
|
|
|
"description": ( |
159
|
|
|
'Data dump taken on "referenceDate",' |
160
|
|
|
f" i.e. {osm_data_date}." |
161
|
|
|
" A subset of this is selected using osm2pgsql" |
162
|
|
|
' using the style file "oedb.style".' |
163
|
|
|
), |
164
|
|
|
"path": f"{osm_url}", |
165
|
|
|
"licenses": licenses, |
166
|
|
|
} |
167
|
|
|
], |
168
|
|
|
"licenses": licenses, |
169
|
|
|
"contributors": [ |
170
|
|
|
{ |
171
|
|
|
"title": "Guido Pleßmann", |
172
|
|
|
"email": "http://github.com/gplssm", |
173
|
|
|
"date": time.strftime("%Y-%m-%d"), |
174
|
|
|
"object": "", |
175
|
|
|
"comment": "Imported data", |
176
|
|
|
} |
177
|
|
|
], |
178
|
|
|
"metaMetadata": { |
179
|
|
|
"metadataVersion": "OEP-1.4.0", |
180
|
|
|
"metadataLicense": { |
181
|
|
|
"name": "CC0-1.0", |
182
|
|
|
"title": "Creative Commons Zero v1.0 Universal", |
183
|
|
|
"path": ( |
184
|
|
|
"https://creativecommons.org/publicdomain/zero/1.0/" |
185
|
|
|
), |
186
|
|
|
}, |
187
|
|
|
}, |
188
|
|
|
} |
189
|
|
|
|
190
|
|
|
meta_json = "'" + json.dumps(meta) + "'" |
191
|
|
|
|
192
|
|
|
db.submit_comment(meta_json, "openstreetmap", table) |
193
|
|
|
|
194
|
|
|
|
195
|
|
|
def modify_tables(): |
196
|
|
|
"""Adjust primary keys, indices and schema of OSM tables. |
197
|
|
|
|
198
|
|
|
* The Column "gid" is added and used as the new primary key. |
199
|
|
|
* Indices (GIST, GIN) are reset |
200
|
|
|
* The tables are moved to the schema configured as the "output_schema". |
201
|
|
|
""" |
202
|
|
|
# Get dataset config |
203
|
|
|
data_config = egon.data.config.datasets()["openstreetmap"] |
204
|
|
|
|
205
|
|
|
# Replace indices and primary keys |
206
|
|
|
for table in [ |
207
|
|
|
f"{data_config['original_data']['target']['table_prefix']}_" + suffix |
208
|
|
|
for suffix in ["line", "point", "polygon", "roads"] |
209
|
|
|
]: |
210
|
|
|
|
211
|
|
|
# Drop indices |
212
|
|
|
sql_statements = [f"DROP INDEX IF EXISTS {table}_index;"] |
213
|
|
|
|
214
|
|
|
# Drop primary keys |
215
|
|
|
sql_statements.append(f"DROP INDEX IF EXISTS {table}_pkey;") |
216
|
|
|
|
217
|
|
|
# Add primary key on newly created column "gid" |
218
|
|
|
sql_statements.append(f"ALTER TABLE public.{table} ADD gid SERIAL;") |
219
|
|
|
sql_statements.append( |
220
|
|
|
f"ALTER TABLE public.{table} ADD PRIMARY KEY (gid);" |
221
|
|
|
) |
222
|
|
|
sql_statements.append( |
223
|
|
|
f"ALTER TABLE public.{table} RENAME COLUMN way TO geom;" |
224
|
|
|
) |
225
|
|
|
|
226
|
|
|
# Add indices (GIST and GIN) |
227
|
|
|
sql_statements.append( |
228
|
|
|
f"CREATE INDEX {table}_geom_idx ON public.{table} " |
229
|
|
|
f"USING gist (geom);" |
230
|
|
|
) |
231
|
|
|
sql_statements.append( |
232
|
|
|
f"CREATE INDEX {table}_tags_idx ON public.{table} " |
233
|
|
|
f"USING GIN (tags);" |
234
|
|
|
) |
235
|
|
|
|
236
|
|
|
# Execute collected SQL statements |
237
|
|
|
for statement in sql_statements: |
238
|
|
|
db.execute_sql(statement) |
239
|
|
|
|
240
|
|
|
# Move table to schema "openstreetmap" |
241
|
|
|
db.execute_sql( |
242
|
|
|
f"CREATE SCHEMA IF NOT EXISTS {data_config['processed']['schema']};" |
243
|
|
|
) |
244
|
|
|
|
245
|
|
|
for out_table in data_config["processed"]["tables"]: |
246
|
|
|
db.execute_sql( |
247
|
|
|
f"DROP TABLE IF EXISTS " |
248
|
|
|
f"{data_config['processed']['schema']}.{out_table};" |
249
|
|
|
) |
250
|
|
|
|
251
|
|
|
sql_statement = ( |
252
|
|
|
f"ALTER TABLE public.{out_table} " |
253
|
|
|
f"SET SCHEMA {data_config['processed']['schema']};" |
254
|
|
|
) |
255
|
|
|
|
256
|
|
|
db.execute_sql(sql_statement) |
257
|
|
|
|
258
|
|
|
|
259
|
|
|
class OpenStreetMap(Dataset): |
260
|
|
|
def __init__(self, dependencies): |
261
|
|
|
super().__init__( |
262
|
|
|
name="OpenStreetMap", |
263
|
|
|
version="0.0.0", |
264
|
|
|
dependencies=dependencies, |
265
|
|
|
tasks=(download, to_postgres, modify_tables, add_metadata), |
266
|
|
|
) |
267
|
|
|
|