Passed
Pull Request — dev (#31)
by Stephan
01:04
created

import_osm   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 220
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 9
eloc 120
dl 0
loc 220
rs 10
c 0
b 0
f 0

4 Functions

Rating   Name   Duplication   Size   Complexity  
A osm2postgres() 0 40 1
A download_osm_file() 0 7 2
A post_import_modifications() 0 58 4
B metadata() 0 97 2
1
from urllib.request import urlretrieve
2
import json
3
import os
4
import subprocess
5
import time
6
7
from egon.data import db
8
import egon.data.config
9
10
11
def download_osm_file():
12
    """Download OpenStreetMap `.pbf` file."""
13
    data_config = egon.data.config.datasets()
14
    osm_config = data_config["openstreetmap"]["original_data"]["osm"]
15
16
    if not os.path.isfile(osm_config["file"]):
17
        urlretrieve(osm_config["url"] + osm_config["file"], osm_config["file"])
18
19
20
def osm2postgres(num_processes=4, cache_size=4096):
21
    """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database.
22
23
    Parameters
24
    ----------
25
    num_processes : int, optional
26
        Number of parallel processes used for processing during data import
27
    cache_size: int, optional
28
        Memory used during data import
29
30
    """
31
    # Read database configuration from docker-compose.yml
32
    docker_db_config = db.credentials()
33
34
    # Get dataset config
35
    data_config = egon.data.config.datasets()
36
    osm_config = data_config["openstreetmap"]["original_data"]["osm"]
37
38
    # Prepare osm2pgsql command
39
    cmd = [
40
        "osm2pgsql",
41
        "--create",
42
        "--slim",
43
        "--hstore-all",
44
        f"--number-processes {num_processes}",
45
        f"--cache {cache_size}",
46
        f"-H {docker_db_config['HOST']} -P {docker_db_config['PORT']} "
47
        f"-d {docker_db_config['POSTGRES_DB']} "
48
        f"-U {docker_db_config['POSTGRES_USER']}",
49
        f"-p {osm_config['table_prefix']}",
50
        f"-S {osm_config['stylefile']}",
51
        f"{osm_config['file']}",
52
    ]
53
54
    # Execute osm2pgsql for import OSM data
55
    subprocess.run(
56
        " ".join(cmd),
57
        shell=True,
58
        env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]},
59
        cwd=os.path.dirname(__file__),
60
    )
61
62
63
def post_import_modifications():
64
    """Adjust primary keys, indices and schema of OSM tables.
65
66
    * The Column "gid" is added and used as the new primary key.
67
    * Indices (GIST, GIN) are reset
68
    * The tables are moved to the schema configured as the "output_schema".
69
    """
70
    # Replace indices and primary keys
71
    for table in [
72
        "osm_" + suffix for suffix in ["line", "point", "polygon", "roads"]
73
    ]:
74
75
        # Drop indices
76
        sql_statements = [f"DROP INDEX {table}_index;"]
77
78
        # Drop primary keys
79
        sql_statements.append(f"DROP INDEX {table}_pkey;")
80
81
        # Add primary key on newly created column "gid"
82
        sql_statements.append(f"ALTER TABLE public.{table} ADD gid SERIAL;")
83
        sql_statements.append(
84
            f"ALTER TABLE public.{table} ADD PRIMARY KEY (gid);"
85
        )
86
        sql_statements.append(
87
            f"ALTER TABLE public.{table} RENAME COLUMN way TO geom;"
88
        )
89
90
        # Add indices (GIST and GIN)
91
        sql_statements.append(
92
            f"CREATE INDEX {table}_geom_idx ON public.{table} "
93
            f"USING gist (geom);"
94
        )
95
        sql_statements.append(
96
            f"CREATE INDEX {table}_tags_idx ON public.{table} "
97
            f"USING GIN (tags);"
98
        )
99
100
        # Execute collected SQL statements
101
        for statement in sql_statements:
102
            db.execute_sql(statement)
103
104
    # Get dataset config
105
    data_config = egon.data.config.datasets()["openstreetmap"][
106
        "original_data"
107
    ]["osm"]
108
109
    # Move table to schema "openstreetmap"
110
    db.execute_sql(
111
        f"CREATE SCHEMA IF NOT EXISTS {data_config['output_schema']};"
112
    )
113
114
    for out_table in data_config["output_tables"]:
115
        sql_statement = (
116
            f"ALTER TABLE public.{out_table} "
117
            f"SET SCHEMA {data_config['output_schema']};"
118
        )
119
120
        db.execute_sql(sql_statement)
121
122
123
def metadata():
124
    """Writes metadata JSON string into table comment."""
125
    # Prepare variables
126
    osm_config = egon.data.config.datasets()["openstreetmap"]["original_data"][
127
        "osm"
128
    ]
129
    spatial_and_date = os.path.basename(osm_config["file"]).split("-")
130
    spatial_extend = spatial_and_date[0]
131
    osm_data_date = (
132
        "20"
133
        + spatial_and_date[1][0:2]
134
        + "-"
135
        + spatial_and_date[1][2:4]
136
        + "-"
137
        + spatial_and_date[1][4:6]
138
    )
139
    osm_url = osm_config["url"]
140
141
    # Insert metadata for each table
142
    licenses = [
143
        {
144
            "name": "Open Data Commons Open Database License 1.0",
145
            "title": "",
146
            "path": "https://opendatacommons.org/licenses/odbl/1.0/",
147
            "instruction": (
148
                "You are free: To Share, To Create, To Adapt;"
149
                " As long as you: Attribute, Share-Alike, Keep open!"
150
            ),
151
            "attribution": "© Reiner Lemoine Institut",
152
        }
153
    ]
154
    for table in osm_config["output_tables"]:
155
        table_suffix = table.split("_")[1]
156
        meta = {
157
            "title": f"OpenStreetMap (OSM) - Germany - {table_suffix}",
158
            "description": (
159
                "OpenStreetMap is a free, editable map of the"
160
                " whole world that is being built by volunteers"
161
                " largely from scratch and released with"
162
                " an open-content license."
163
            ),
164
            "language": ["EN", "DE"],
165
            "spatial": {
166
                "location": "",
167
                "extent": f"{spatial_extend}",
168
                "resolution": "",
169
            },
170
            "temporal": {
171
                "referenceDate": f"{osm_data_date}",
172
                "timeseries": {
173
                    "start": "",
174
                    "end": "",
175
                    "resolution": "",
176
                    "alignment": "",
177
                    "aggregationType": "",
178
                },
179
            },
180
            "sources": [
181
                {
182
                    "title": (
183
                        "Geofabrik - Download - OpenStreetMap Data Extracts"
184
                    ),
185
                    "description": (
186
                        'Data dump taken on "referenceDate",'
187
                        f" i.e. {osm_data_date}."
188
                        " A subset of this is selected using osm2pgsql"
189
                        ' using the style file "oedb.style".'
190
                    ),
191
                    "path": f"{osm_url}",
192
                    "licenses": licenses,
193
                }
194
            ],
195
            "licenses": licenses,
196
            "contributors": [
197
                {
198
                    "title": "Guido Pleßmann",
199
                    "email": "http://github.com/gplssm",
200
                    "date": time.strftime("%Y-%m-%d"),
201
                    "object": "",
202
                    "comment": "Imported data",
203
                }
204
            ],
205
            "metaMetadata": {
206
                "metadataVersion": "OEP-1.4.0",
207
                "metadataLicense": {
208
                    "name": "CC0-1.0",
209
                    "title": "Creative Commons Zero v1.0 Universal",
210
                    "path": (
211
                        "https://creativecommons.org/publicdomain/zero/1.0/"
212
                    ),
213
                },
214
            },
215
        }
216
217
        meta_json = "'" + json.dumps(meta) + "'"
218
219
        db.submit_comment(meta_json, "openstreetmap", table)
220