Passed
Pull Request — dev (#31)
by
unknown
54s
created

import_osm.post_import_modifications()   A

Complexity

Conditions 4

Size

Total Lines 59
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 59
rs 9.28
c 0
b 0
f 0
cc 4
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from urllib.request import urlretrieve
2
import json
3
import os
4
import subprocess
5
import time
6
7
from egon.data import db
8
import egon.data.config
9
10
11
def download_osm_file():
12
    """Download OpenStreetMap `.pbf` file."""
13
    data_config = egon.data.config.datasets()
14
    osm_config = data_config["openstreetmap"]["original_data"]["osm"]
15
16
    if not os.path.isfile(osm_config["file"]):
17
        urlretrieve(osm_config["url"] + osm_config["file"], osm_config["file"])
18
19
20
def osm2postgres(num_processes=4, cache_size=4096):
21
    """
22
    Import OSM data from `.pbf` file (from Geofabrik) to Postgres DB.
23
24
    Parameters
25
    ----------
26
    num_processes : int, optional
27
        Number of parallel processes used for processing during data import
28
    cache_size: int, optional
29
        Memory used during data import
30
31
    """
32
    # Read database configuration from docker-compose.yml
33
    docker_db_config = db.credentials()
34
35
    # Get data set config
36
    data_config = egon.data.config.datasets()
37
    osm_config = data_config["openstreetmap"]["original_data"]["osm"]
38
39
    # Prepare osm2pgsql command
40
    cmd = [
41
        "osm2pgsql",
42
        "--create",
43
        "--slim",
44
        "--hstore-all",
45
        f"--number-processes {num_processes}",
46
        f"--cache {cache_size}",
47
        f"-H {docker_db_config['HOST']} -P {docker_db_config['PORT']} "
48
        f"-d {docker_db_config['POSTGRES_DB']} "
49
        f"-U {docker_db_config['POSTGRES_USER']}",
50
        f"-p {osm_config['table_prefix']}",
51
        f"-S {osm_config['stylefile']}",
52
        f"{osm_config['file']}",
53
    ]
54
55
    # Execute osm2pgsql for import OSM data
56
    subprocess.run(
57
        " ".join(cmd),
58
        shell=True,
59
        env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]},
60
        cwd=os.path.dirname(__file__),
61
    )
62
63
64
def post_import_modifications():
65
    """
66
    Adjust primary keys, indices and schema of OSM tables.
67
68
    * Column gid is introduced as new primary key
69
    * Indices (GIST, GIN) are reset
70
    * Tables are move to schema 'openstreemap'
71
    """
72
    # Replace indices and primary keys
73
    for table in [
74
        "osm_" + suffix for suffix in ["line", "point", "polygon", "roads"]
75
    ]:
76
77
        # Drop indices
78
        sql_statements = [f"DROP INDEX {table}_index;"]
79
80
        # Drop primary keys
81
        sql_statements.append(f"DROP INDEX {table}_pkey;")
82
83
        # Add primary key on newly created column "gid"
84
        sql_statements.append(f"ALTER TABLE public.{table} ADD gid SERIAL;")
85
        sql_statements.append(
86
            f"ALTER TABLE public.{table} ADD PRIMARY KEY (gid);"
87
        )
88
        sql_statements.append(
89
            f"ALTER TABLE public.{table} RENAME COLUMN way TO geom;"
90
        )
91
92
        # Add indices (GIST and GIN)
93
        sql_statements.append(
94
            f"CREATE INDEX {table}_geom_idx ON public.{table} "
95
            f"USING gist (geom);"
96
        )
97
        sql_statements.append(
98
            f"CREATE INDEX {table}_tags_idx ON public.{table} "
99
            f"USING GIN (tags);"
100
        )
101
102
        # Execute collected SQL statements
103
        for statement in sql_statements:
104
            db.execute_sql(statement)
105
106
    # Get data set config
107
    data_config = egon.data.config.datasets()["openstreetmap"][
108
        "original_data"
109
    ]["osm"]
110
111
    # Move table to schema "openstreetmap"
112
    db.execute_sql(
113
        f"CREATE SCHEMA IF NOT EXISTS {data_config['output_schema']};"
114
    )
115
116
    for out_table in data_config["output_tables"]:
117
        sql_statement = (
118
            f"ALTER TABLE public.{out_table} "
119
            f"SET SCHEMA {data_config['output_schema']};"
120
        )
121
122
        db.execute_sql(sql_statement)
123
124
125
def metadata():
126
    """Writes metadata JSON string into table comment."""
127
    # Prepare variables
128
    osm_config = egon.data.config.datasets()["openstreetmap"]["original_data"][
129
        "osm"
130
    ]
131
    spatial_and_date = os.path.basename(osm_config["file"]).split("-")
132
    spatial_extend = spatial_and_date[0]
133
    osm_data_date = (
134
        "20"
135
        + spatial_and_date[1][0:2]
136
        + "-"
137
        + spatial_and_date[1][2:4]
138
        + "-"
139
        + spatial_and_date[1][4:6]
140
    )
141
    osm_url = osm_config["url"]
142
143
    # Insert metadata for each table
144
    for table in osm_config["output_tables"]:
145
        table_suffix = table.split("_")[1]
146
        meta = {
147
            "title": f"OpenStreetMap (OSM) - Germany - {table_suffix}",
148
            "description": "OpenStreetMap is a free, editable map of the "
149
            "whole world that is being built by volunteers "
150
            "largely from scratch and released with "
151
            "an open-content license.",
152
            "language": ["EN", "DE"],
153
            "spatial": {
154
                "location": "",
155
                "extent": f"{spatial_extend}",
156
                "resolution": "",
157
            },
158
            "temporal": {
159
                "referenceDate": f"{osm_data_date}",
160
                "timeseries": {
161
                    "start": "",
162
                    "end": "",
163
                    "resolution": "",
164
                    "alignment": "",
165
                    "aggregationType": "",
166
                },
167
            },
168
            "sources": [
169
                {
170
                    "title": "Geofabrik - Download - OpenStreetMap Data "
171
                    "Extracts",
172
                    "description": "Data dump of reference date. Thereof, a "
173
                    "subset is selected using"
174
                    "osm2pgsql with oedb.style style file",
175
                    "path": f"{osm_url}",
176
                    "licenses": [
177
                        {
178
                            "name": "Open Data Commons Open Database "
179
                            "License 1.0",
180
                            "title": "",
181
                            "path": "https://opendatacommons.org/licenses/"
182
                            "odbl/1.0/",
183
                            "instruction": "You are free: To Share, To "
184
                            "Create, To Adapt; As long as you: "
185
                            "Attribute, Share-Alike, Keep "
186
                            "open!",
187
                            "attribution": "© Reiner Lemoine Institut",
188
                        }
189
                    ],
190
                }
191
            ],
192
            "licenses": [
193
                {
194
                    "name": "Open Data Commons Open Database License 1.0",
195
                    "title": "",
196
                    "path": "https://opendatacommons.org/licenses/odbl/1.0/",
197
                    "instruction": "You are free: To Share, To Create, To "
198
                    "Adapt; As long as you: Attribute, "
199
                    "Share-Alike, Keep open!",
200
                    "attribution": "© Reiner Lemoine Institut",
201
                }
202
            ],
203
            "contributors": [
204
                {
205
                    "title": "Guido Pleßmann",
206
                    "email": "http://github.com/gplssm",
207
                    "date": time.strftime("%Y-%m-%d"),
208
                    "object": "",
209
                    "comment": "Imported data",
210
                }
211
            ],
212
            "metaMetadata": {
213
                "metadataVersion": "OEP-1.4.0",
214
                "metadataLicense": {
215
                    "name": "CC0-1.0",
216
                    "title": "Creative Commons Zero v1.0 Universal",
217
                    "path": "https://creativecommons.org/publicdomain/"
218
                    "zero/1.0/",
219
                },
220
            },
221
        }
222
223
        meta_json = "'" + json.dumps(meta) + "'"
224
225
        db.submit_comment(meta_json, "openstreetmap", table)
226