1
|
|
|
from urllib.request import urlretrieve |
2
|
|
|
import json |
3
|
|
|
import os |
4
|
|
|
import subprocess |
5
|
|
|
import time |
6
|
|
|
|
7
|
|
|
from egon.data import db |
8
|
|
|
import egon.data.config |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
def download_osm_file(): |
12
|
|
|
"""Download OpenStreetMap `.pbf` file.""" |
13
|
|
|
data_config = egon.data.config.datasets() |
14
|
|
|
osm_config = data_config["openstreetmap"]["original_data"]["osm"] |
15
|
|
|
|
16
|
|
|
if not os.path.isfile(osm_config["file"]): |
17
|
|
|
urlretrieve(osm_config["url"] + osm_config["file"], osm_config["file"]) |
18
|
|
|
|
19
|
|
|
|
20
|
|
|
def osm2postgres(num_processes=4, cache_size=4096): |
21
|
|
|
"""Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database. |
22
|
|
|
|
23
|
|
|
Parameters |
24
|
|
|
---------- |
25
|
|
|
num_processes : int, optional |
26
|
|
|
Number of parallel processes used for processing during data import |
27
|
|
|
cache_size: int, optional |
28
|
|
|
Memory used during data import |
29
|
|
|
|
30
|
|
|
""" |
31
|
|
|
# Read database configuration from docker-compose.yml |
32
|
|
|
docker_db_config = db.credentials() |
33
|
|
|
|
34
|
|
|
# Get dataset config |
35
|
|
|
data_config = egon.data.config.datasets() |
36
|
|
|
osm_config = data_config["openstreetmap"]["original_data"]["osm"] |
37
|
|
|
|
38
|
|
|
# Prepare osm2pgsql command |
39
|
|
|
cmd = [ |
40
|
|
|
"osm2pgsql", |
41
|
|
|
"--create", |
42
|
|
|
"--slim", |
43
|
|
|
"--hstore-all", |
44
|
|
|
f"--number-processes {num_processes}", |
45
|
|
|
f"--cache {cache_size}", |
46
|
|
|
f"-H {docker_db_config['HOST']} -P {docker_db_config['PORT']} " |
47
|
|
|
f"-d {docker_db_config['POSTGRES_DB']} " |
48
|
|
|
f"-U {docker_db_config['POSTGRES_USER']}", |
49
|
|
|
f"-p {osm_config['table_prefix']}", |
50
|
|
|
f"-S {osm_config['stylefile']}", |
51
|
|
|
f"{osm_config['file']}", |
52
|
|
|
] |
53
|
|
|
|
54
|
|
|
# Execute osm2pgsql for import OSM data |
55
|
|
|
subprocess.run( |
56
|
|
|
" ".join(cmd), |
57
|
|
|
shell=True, |
58
|
|
|
env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]}, |
59
|
|
|
cwd=os.path.dirname(__file__), |
60
|
|
|
) |
61
|
|
|
|
62
|
|
|
|
63
|
|
|
def post_import_modifications(): |
64
|
|
|
"""Adjust primary keys, indices and schema of OSM tables. |
65
|
|
|
|
66
|
|
|
* The Column "gid" is added and used as the new primary key. |
67
|
|
|
* Indices (GIST, GIN) are reset |
68
|
|
|
* The tables are moved to the schema configured as the "output_schema". |
69
|
|
|
""" |
70
|
|
|
# Replace indices and primary keys |
71
|
|
|
for table in [ |
72
|
|
|
"osm_" + suffix for suffix in ["line", "point", "polygon", "roads"] |
73
|
|
|
]: |
74
|
|
|
|
75
|
|
|
# Drop indices |
76
|
|
|
sql_statements = [f"DROP INDEX {table}_index;"] |
77
|
|
|
|
78
|
|
|
# Drop primary keys |
79
|
|
|
sql_statements.append(f"DROP INDEX {table}_pkey;") |
80
|
|
|
|
81
|
|
|
# Add primary key on newly created column "gid" |
82
|
|
|
sql_statements.append(f"ALTER TABLE public.{table} ADD gid SERIAL;") |
83
|
|
|
sql_statements.append( |
84
|
|
|
f"ALTER TABLE public.{table} ADD PRIMARY KEY (gid);" |
85
|
|
|
) |
86
|
|
|
sql_statements.append( |
87
|
|
|
f"ALTER TABLE public.{table} RENAME COLUMN way TO geom;" |
88
|
|
|
) |
89
|
|
|
|
90
|
|
|
# Add indices (GIST and GIN) |
91
|
|
|
sql_statements.append( |
92
|
|
|
f"CREATE INDEX {table}_geom_idx ON public.{table} " |
93
|
|
|
f"USING gist (geom);" |
94
|
|
|
) |
95
|
|
|
sql_statements.append( |
96
|
|
|
f"CREATE INDEX {table}_tags_idx ON public.{table} " |
97
|
|
|
f"USING GIN (tags);" |
98
|
|
|
) |
99
|
|
|
|
100
|
|
|
# Execute collected SQL statements |
101
|
|
|
for statement in sql_statements: |
102
|
|
|
db.execute_sql(statement) |
103
|
|
|
|
104
|
|
|
# Get dataset config |
105
|
|
|
data_config = egon.data.config.datasets()["openstreetmap"][ |
106
|
|
|
"original_data" |
107
|
|
|
]["osm"] |
108
|
|
|
|
109
|
|
|
# Move table to schema "openstreetmap" |
110
|
|
|
db.execute_sql( |
111
|
|
|
f"CREATE SCHEMA IF NOT EXISTS {data_config['output_schema']};" |
112
|
|
|
) |
113
|
|
|
|
114
|
|
|
for out_table in data_config["output_tables"]: |
115
|
|
|
sql_statement = ( |
116
|
|
|
f"ALTER TABLE public.{out_table} " |
117
|
|
|
f"SET SCHEMA {data_config['output_schema']};" |
118
|
|
|
) |
119
|
|
|
|
120
|
|
|
db.execute_sql(sql_statement) |
121
|
|
|
|
122
|
|
|
|
123
|
|
|
def metadata(): |
124
|
|
|
"""Writes metadata JSON string into table comment.""" |
125
|
|
|
# Prepare variables |
126
|
|
|
osm_config = egon.data.config.datasets()["openstreetmap"]["original_data"][ |
127
|
|
|
"osm" |
128
|
|
|
] |
129
|
|
|
spatial_and_date = os.path.basename(osm_config["file"]).split("-") |
130
|
|
|
spatial_extend = spatial_and_date[0] |
131
|
|
|
osm_data_date = ( |
132
|
|
|
"20" |
133
|
|
|
+ spatial_and_date[1][0:2] |
134
|
|
|
+ "-" |
135
|
|
|
+ spatial_and_date[1][2:4] |
136
|
|
|
+ "-" |
137
|
|
|
+ spatial_and_date[1][4:6] |
138
|
|
|
) |
139
|
|
|
osm_url = osm_config["url"] |
140
|
|
|
|
141
|
|
|
# Insert metadata for each table |
142
|
|
|
licenses = [ |
143
|
|
|
{ |
144
|
|
|
"name": "Open Data Commons Open Database License 1.0", |
145
|
|
|
"title": "", |
146
|
|
|
"path": "https://opendatacommons.org/licenses/odbl/1.0/", |
147
|
|
|
"instruction": ( |
148
|
|
|
"You are free: To Share, To Create, To Adapt;" |
149
|
|
|
" As long as you: Attribute, Share-Alike, Keep open!" |
150
|
|
|
), |
151
|
|
|
"attribution": "© Reiner Lemoine Institut", |
152
|
|
|
} |
153
|
|
|
] |
154
|
|
|
for table in osm_config["output_tables"]: |
155
|
|
|
table_suffix = table.split("_")[1] |
156
|
|
|
meta = { |
157
|
|
|
"title": f"OpenStreetMap (OSM) - Germany - {table_suffix}", |
158
|
|
|
"description": ( |
159
|
|
|
"OpenStreetMap is a free, editable map of the" |
160
|
|
|
" whole world that is being built by volunteers" |
161
|
|
|
" largely from scratch and released with" |
162
|
|
|
" an open-content license." |
163
|
|
|
), |
164
|
|
|
"language": ["EN", "DE"], |
165
|
|
|
"spatial": { |
166
|
|
|
"location": "", |
167
|
|
|
"extent": f"{spatial_extend}", |
168
|
|
|
"resolution": "", |
169
|
|
|
}, |
170
|
|
|
"temporal": { |
171
|
|
|
"referenceDate": f"{osm_data_date}", |
172
|
|
|
"timeseries": { |
173
|
|
|
"start": "", |
174
|
|
|
"end": "", |
175
|
|
|
"resolution": "", |
176
|
|
|
"alignment": "", |
177
|
|
|
"aggregationType": "", |
178
|
|
|
}, |
179
|
|
|
}, |
180
|
|
|
"sources": [ |
181
|
|
|
{ |
182
|
|
|
"title": ( |
183
|
|
|
"Geofabrik - Download - OpenStreetMap Data Extracts" |
184
|
|
|
), |
185
|
|
|
"description": ( |
186
|
|
|
'Data dump taken on "referenceDate",' |
187
|
|
|
f" i.e. {osm_data_date}." |
188
|
|
|
" A subset of this is selected using osm2pgsql" |
189
|
|
|
' using the style file "oedb.style".' |
190
|
|
|
), |
191
|
|
|
"path": f"{osm_url}", |
192
|
|
|
"licenses": licenses, |
193
|
|
|
} |
194
|
|
|
], |
195
|
|
|
"licenses": licenses, |
196
|
|
|
"contributors": [ |
197
|
|
|
{ |
198
|
|
|
"title": "Guido Pleßmann", |
199
|
|
|
"email": "http://github.com/gplssm", |
200
|
|
|
"date": time.strftime("%Y-%m-%d"), |
201
|
|
|
"object": "", |
202
|
|
|
"comment": "Imported data", |
203
|
|
|
} |
204
|
|
|
], |
205
|
|
|
"metaMetadata": { |
206
|
|
|
"metadataVersion": "OEP-1.4.0", |
207
|
|
|
"metadataLicense": { |
208
|
|
|
"name": "CC0-1.0", |
209
|
|
|
"title": "Creative Commons Zero v1.0 Universal", |
210
|
|
|
"path": ( |
211
|
|
|
"https://creativecommons.org/publicdomain/zero/1.0/" |
212
|
|
|
), |
213
|
|
|
}, |
214
|
|
|
}, |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
meta_json = "'" + json.dumps(meta) + "'" |
218
|
|
|
|
219
|
|
|
db.submit_comment(meta_json, "openstreetmap", table) |
220
|
|
|
|