Passed
Push — dev ( 836244...247d11 )
by Stephan
01:23 queued 11s
created

data.datasets.osm   A

Complexity

Total Complexity 13

Size/Duplication

Total Lines 266
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 143
dl 0
loc 266
rs 10
c 0
b 0
f 0
wmc 13

1 Method

Rating   Name   Duplication   Size   Complexity  
A OpenStreetMap.__init__() 0 6 1

4 Functions

Rating   Name   Duplication   Size   Complexity  
A modify_tables() 0 62 4
B add_metadata() 0 101 3
A download() 0 16 3
A to_postgres() 0 47 2
1
"""The central module containing all code dealing with importing OSM data.
2
3
This module either directly contains the code dealing with importing OSM
4
data, or it re-exports everything needed to handle it. Please refrain
5
from importing code from any modules below this one, because it might
6
lead to unwanted behaviour.
7
8
If you have to import code from a module below this one because the code
9
isn't exported from this module, please file a bug, so we can fix this.
10
"""
11
12
from urllib.request import urlretrieve
13
import json
14
import os
15
import time
16
17
from egon.data import db
18
from egon.data.config import settings
19
from egon.data.datasets import Dataset
20
import egon.data.config
21
import egon.data.subprocess as subprocess
22
23
24
def download():
25
    """Download OpenStreetMap `.pbf` file."""
26
    data_config = egon.data.config.datasets()
27
    osm_config = data_config["openstreetmap"]["original_data"]
28
29
    if settings()["egon-data"]["--dataset-boundary"] == "Everything":
30
        source_url = osm_config["source"]["url"]
31
        target_path = osm_config["target"]["path"]
32
    else:
33
        source_url = osm_config["source"]["url_testmode"]
34
        target_path = osm_config["target"]["path_testmode"]
35
36
    target_file = os.path.join(os.path.dirname(__file__), target_path)
37
38
    if not os.path.isfile(target_file):
39
        urlretrieve(source_url, target_file)
40
41
42
def to_postgres(num_processes=4, cache_size=4096):
43
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database.
44
45
    Parameters
46
    ----------
47
    num_processes : int, optional
48
        Number of parallel processes used for processing during data import
49
    cache_size: int, optional
50
        Memory used during data import
51
52
    """
53
    # Read database configuration from docker-compose.yml
54
    docker_db_config = db.credentials()
55
56
    # Get dataset config
57
    data_config = egon.data.config.datasets()
58
    osm_config = data_config["openstreetmap"]["original_data"]
59
60
    if settings()["egon-data"]["--dataset-boundary"] == "Everything":
61
        target_path = osm_config["target"]["path"]
62
    else:
63
        target_path = osm_config["target"]["path_testmode"]
64
65
    input_file = os.path.join(os.path.dirname(__file__), target_path)
66
67
    # Prepare osm2pgsql command
68
    cmd = [
69
        "osm2pgsql",
70
        "--create",
71
        "--slim",
72
        "--hstore-all",
73
        f"--number-processes {num_processes}",
74
        f"--cache {cache_size}",
75
        f"-H {docker_db_config['HOST']} -P {docker_db_config['PORT']} "
76
        f"-d {docker_db_config['POSTGRES_DB']} "
77
        f"-U {docker_db_config['POSTGRES_USER']}",
78
        f"-p {osm_config['target']['table_prefix']}",
79
        f"-S {osm_config['source']['stylefile']}",
80
        f"{input_file}",
81
    ]
82
83
    # Execute osm2pgsql for import OSM data
84
    subprocess.run(
85
        " ".join(cmd),
86
        shell=True,
87
        env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]},
88
        cwd=os.path.dirname(__file__),
89
    )
90
91
92
def add_metadata():
93
    """Writes metadata JSON string into table comment."""
94
    # Prepare variables
95
    osm_config = egon.data.config.datasets()["openstreetmap"]
96
97
    if settings()["egon-data"]["--dataset-boundary"] == "Everything":
98
        osm_url = osm_config["original_data"]["source"]["url"]
99
        target_path = osm_config["original_data"]["target"]["path"]
100
    else:
101
        osm_url = osm_config["original_data"]["source"]["url_testmode"]
102
        target_path = osm_config["original_data"]["target"]["path_testmode"]
103
    spatial_and_date = os.path.basename(target_path).split("-")
104
    spatial_extend = spatial_and_date[0]
105
    osm_data_date = (
106
        "20"
107
        + spatial_and_date[1][0:2]
108
        + "-"
109
        + spatial_and_date[1][2:4]
110
        + "-"
111
        + spatial_and_date[1][4:6]
112
    )
113
114
    # Insert metadata for each table
115
    licenses = [
116
        {
117
            "name": "Open Data Commons Open Database License 1.0",
118
            "title": "",
119
            "path": "https://opendatacommons.org/licenses/odbl/1.0/",
120
            "instruction": (
121
                "You are free: To Share, To Create, To Adapt;"
122
                " As long as you: Attribute, Share-Alike, Keep open!"
123
            ),
124
            "attribution": "© Reiner Lemoine Institut",
125
        }
126
    ]
127
    for table in osm_config["processed"]["tables"]:
128
        table_suffix = table.split("_")[1]
129
        meta = {
130
            "title": f"OpenStreetMap (OSM) - Germany - {table_suffix}",
131
            "description": (
132
                "OpenStreetMap is a free, editable map of the"
133
                " whole world that is being built by volunteers"
134
                " largely from scratch and released with"
135
                " an open-content license."
136
            ),
137
            "language": ["EN", "DE"],
138
            "spatial": {
139
                "location": "",
140
                "extent": f"{spatial_extend}",
141
                "resolution": "",
142
            },
143
            "temporal": {
144
                "referenceDate": f"{osm_data_date}",
145
                "timeseries": {
146
                    "start": "",
147
                    "end": "",
148
                    "resolution": "",
149
                    "alignment": "",
150
                    "aggregationType": "",
151
                },
152
            },
153
            "sources": [
154
                {
155
                    "title": (
156
                        "Geofabrik - Download - OpenStreetMap Data Extracts"
157
                    ),
158
                    "description": (
159
                        'Data dump taken on "referenceDate",'
160
                        f" i.e. {osm_data_date}."
161
                        " A subset of this is selected using osm2pgsql"
162
                        ' using the style file "oedb.style".'
163
                    ),
164
                    "path": f"{osm_url}",
165
                    "licenses": licenses,
166
                }
167
            ],
168
            "licenses": licenses,
169
            "contributors": [
170
                {
171
                    "title": "Guido Pleßmann",
172
                    "email": "http://github.com/gplssm",
173
                    "date": time.strftime("%Y-%m-%d"),
174
                    "object": "",
175
                    "comment": "Imported data",
176
                }
177
            ],
178
            "metaMetadata": {
179
                "metadataVersion": "OEP-1.4.0",
180
                "metadataLicense": {
181
                    "name": "CC0-1.0",
182
                    "title": "Creative Commons Zero v1.0 Universal",
183
                    "path": (
184
                        "https://creativecommons.org/publicdomain/zero/1.0/"
185
                    ),
186
                },
187
            },
188
        }
189
190
        meta_json = "'" + json.dumps(meta) + "'"
191
192
        db.submit_comment(meta_json, "openstreetmap", table)
193
194
195
def modify_tables():
196
    """Adjust primary keys, indices and schema of OSM tables.
197
198
    * The Column "gid" is added and used as the new primary key.
199
    * Indices (GIST, GIN) are reset
200
    * The tables are moved to the schema configured as the "output_schema".
201
    """
202
    # Get dataset config
203
    data_config = egon.data.config.datasets()["openstreetmap"]
204
205
    # Replace indices and primary keys
206
    for table in [
207
        f"{data_config['original_data']['target']['table_prefix']}_" + suffix
208
        for suffix in ["line", "point", "polygon", "roads"]
209
    ]:
210
211
        # Drop indices
212
        sql_statements = [f"DROP INDEX IF EXISTS {table}_index;"]
213
214
        # Drop primary keys
215
        sql_statements.append(f"DROP INDEX IF EXISTS {table}_pkey;")
216
217
        # Add primary key on newly created column "gid"
218
        sql_statements.append(f"ALTER TABLE public.{table} ADD gid SERIAL;")
219
        sql_statements.append(
220
            f"ALTER TABLE public.{table} ADD PRIMARY KEY (gid);"
221
        )
222
        sql_statements.append(
223
            f"ALTER TABLE public.{table} RENAME COLUMN way TO geom;"
224
        )
225
226
        # Add indices (GIST and GIN)
227
        sql_statements.append(
228
            f"CREATE INDEX {table}_geom_idx ON public.{table} "
229
            f"USING gist (geom);"
230
        )
231
        sql_statements.append(
232
            f"CREATE INDEX {table}_tags_idx ON public.{table} "
233
            f"USING GIN (tags);"
234
        )
235
236
        # Execute collected SQL statements
237
        for statement in sql_statements:
238
            db.execute_sql(statement)
239
240
    # Move table to schema "openstreetmap"
241
    db.execute_sql(
242
        f"CREATE SCHEMA IF NOT EXISTS {data_config['processed']['schema']};"
243
    )
244
245
    for out_table in data_config["processed"]["tables"]:
246
        db.execute_sql(
247
            f"DROP TABLE IF EXISTS "
248
            f"{data_config['processed']['schema']}.{out_table};"
249
        )
250
251
        sql_statement = (
252
            f"ALTER TABLE public.{out_table} "
253
            f"SET SCHEMA {data_config['processed']['schema']};"
254
        )
255
256
        db.execute_sql(sql_statement)
257
258
259
class OpenStreetMap(Dataset):
260
    def __init__(self, dependencies):
261
        super().__init__(
262
            name="OpenStreetMap",
263
            version="0.0.0",
264
            dependencies=dependencies,
265
            tasks=(download, to_postgres, modify_tables, add_metadata),
266
        )
267