Passed
Pull Request — dev (#31)
by
unknown
01:27
created

import_osm.osm2postgres()   A

Complexity

Conditions 1

Size

Total Lines 40
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 20
dl 0
loc 40
rs 9.4
c 0
b 0
f 0
cc 1
nop 2
1
from urllib.request import urlretrieve
2
import json
3
import os
4
import subprocess
5
import time
6
7
from egon.data import db
8
import egon.data.config
9
10
11
def download_osm_file():
12
    """Download OpenStreetMap `.pbf` file."""
13
    data_config = egon.data.config.datasets()
14
    osm_config = data_config["openstreetmap"]["original_data"]["osm"]
15
16
    if not os.path.isfile(osm_config["file"]):
17
        urlretrieve(osm_config["url"] + osm_config["file"], osm_config["file"])
18
19
20
def osm2postgres(num_processes=4, cache_size=4096):
21
    """Import OSM data from `.pbf` file (from Geofabrik) to Postgres DB.
22
23
    Parameters
24
    ----------
25
    num_processes : int, optional
26
        Number of parallel processes used for processing during data import
27
    cache_size: int, optional
28
        Memory used during data import
29
30
    """
31
    # Read database configuration from docker-compose.yml
32
    docker_db_config = db.credentials()
33
34
    # Get data set config
35
    data_config = egon.data.config.datasets()
36
    osm_config = data_config["openstreetmap"]["original_data"]["osm"]
37
38
    # Prepare osm2pgsql command
39
    cmd = [
40
        "osm2pgsql",
41
        "--create",
42
        "--slim",
43
        "--hstore-all",
44
        f"--number-processes {num_processes}",
45
        f"--cache {cache_size}",
46
        f"-H {docker_db_config['HOST']} -P {docker_db_config['PORT']} "
47
        f"-d {docker_db_config['POSTGRES_DB']} "
48
        f"-U {docker_db_config['POSTGRES_USER']}",
49
        f"-p {osm_config['table_prefix']}",
50
        f"-S {osm_config['stylefile']}",
51
        f"{osm_config['file']}",
52
    ]
53
54
    # Execute osm2pgsql for import OSM data
55
    subprocess.run(
56
        " ".join(cmd),
57
        shell=True,
58
        env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]},
59
        cwd=os.path.dirname(__file__),
60
    )
61
62
63
def post_import_modifications():
64
    """Adjust primary keys, indices and schema of OSM tables.
65
66
    * Column gid is introduced as new primary key
67
    * Indices (GIST, GIN) are reset
68
    * Tables are move to schema 'openstreemap'
69
    """
70
    # Replace indices and primary keys
71
    for table in [
72
        "osm_" + suffix for suffix in ["line", "point", "polygon", "roads"]
73
    ]:
74
75
        # Drop indices
76
        sql_statements = [f"DROP INDEX {table}_index;"]
77
78
        # Drop primary keys
79
        sql_statements.append(f"DROP INDEX {table}_pkey;")
80
81
        # Add primary key on newly created column "gid"
82
        sql_statements.append(f"ALTER TABLE public.{table} ADD gid SERIAL;")
83
        sql_statements.append(
84
            f"ALTER TABLE public.{table} ADD PRIMARY KEY (gid);"
85
        )
86
        sql_statements.append(
87
            f"ALTER TABLE public.{table} RENAME COLUMN way TO geom;"
88
        )
89
90
        # Add indices (GIST and GIN)
91
        sql_statements.append(
92
            f"CREATE INDEX {table}_geom_idx ON public.{table} "
93
            f"USING gist (geom);"
94
        )
95
        sql_statements.append(
96
            f"CREATE INDEX {table}_tags_idx ON public.{table} "
97
            f"USING GIN (tags);"
98
        )
99
100
        # Execute collected SQL statements
101
        for statement in sql_statements:
102
            db.execute_sql(statement)
103
104
    # Get data set config
105
    data_config = egon.data.config.datasets()["openstreetmap"][
106
        "original_data"
107
    ]["osm"]
108
109
    # Move table to schema "openstreetmap"
110
    db.execute_sql(
111
        f"CREATE SCHEMA IF NOT EXISTS {data_config['output_schema']};"
112
    )
113
114
    for out_table in data_config["output_tables"]:
115
        sql_statement = (
116
            f"ALTER TABLE public.{out_table} "
117
            f"SET SCHEMA {data_config['output_schema']};"
118
        )
119
120
        db.execute_sql(sql_statement)
121
122
123
def metadata():
124
    """Writes metadata JSON string into table comment."""
125
    # Prepare variables
126
    osm_config = egon.data.config.datasets()["openstreetmap"]["original_data"][
127
        "osm"
128
    ]
129
    spatial_and_date = os.path.basename(osm_config["file"]).split("-")
130
    spatial_extend = spatial_and_date[0]
131
    osm_data_date = (
132
        "20"
133
        + spatial_and_date[1][0:2]
134
        + "-"
135
        + spatial_and_date[1][2:4]
136
        + "-"
137
        + spatial_and_date[1][4:6]
138
    )
139
    osm_url = osm_config["url"]
140
141
    # Insert metadata for each table
142
    for table in osm_config["output_tables"]:
143
        table_suffix = table.split("_")[1]
144
        meta = {
145
            "title": f"OpenStreetMap (OSM) - Germany - {table_suffix}",
146
            "description": "OpenStreetMap is a free, editable map of the "
147
            "whole world that is being built by volunteers "
148
            "largely from scratch and released with "
149
            "an open-content license.",
150
            "language": ["EN", "DE"],
151
            "spatial": {
152
                "location": "",
153
                "extent": f"{spatial_extend}",
154
                "resolution": "",
155
            },
156
            "temporal": {
157
                "referenceDate": f"{osm_data_date}",
158
                "timeseries": {
159
                    "start": "",
160
                    "end": "",
161
                    "resolution": "",
162
                    "alignment": "",
163
                    "aggregationType": "",
164
                },
165
            },
166
            "sources": [
167
                {
168
                    "title": "Geofabrik - Download - OpenStreetMap Data "
169
                    "Extracts",
170
                    "description": "Data dump of reference date. Thereof, a "
171
                    "subset is selected using"
172
                    "osm2pgsql with oedb.style style file",
173
                    "path": f"{osm_url}",
174
                    "licenses": [
175
                        {
176
                            "name": "Open Data Commons Open Database "
177
                            "License 1.0",
178
                            "title": "",
179
                            "path": "https://opendatacommons.org/licenses/"
180
                            "odbl/1.0/",
181
                            "instruction": "You are free: To Share, To "
182
                            "Create, To Adapt; As long as you: "
183
                            "Attribute, Share-Alike, Keep "
184
                            "open!",
185
                            "attribution": "© Reiner Lemoine Institut",
186
                        }
187
                    ],
188
                }
189
            ],
190
            "licenses": [
191
                {
192
                    "name": "Open Data Commons Open Database License 1.0",
193
                    "title": "",
194
                    "path": "https://opendatacommons.org/licenses/odbl/1.0/",
195
                    "instruction": "You are free: To Share, To Create, To "
196
                    "Adapt; As long as you: Attribute, "
197
                    "Share-Alike, Keep open!",
198
                    "attribution": "© Reiner Lemoine Institut",
199
                }
200
            ],
201
            "contributors": [
202
                {
203
                    "title": "Guido Pleßmann",
204
                    "email": "http://github.com/gplssm",
205
                    "date": time.strftime("%Y-%m-%d"),
206
                    "object": "",
207
                    "comment": "Imported data",
208
                }
209
            ],
210
            "metaMetadata": {
211
                "metadataVersion": "OEP-1.4.0",
212
                "metadataLicense": {
213
                    "name": "CC0-1.0",
214
                    "title": "Creative Commons Zero v1.0 Universal",
215
                    "path": "https://creativecommons.org/publicdomain/"
216
                    "zero/1.0/",
217
                },
218
            },
219
        }
220
221
        meta_json = "'" + json.dumps(meta) + "'"
222
223
        db.submit_comment(meta_json, "openstreetmap", table)
224