Passed
Pull Request — dev (#968)
by
unknown
01:51
created

heavy_duty_transport.data_io.bast_gdf()   A

Complexity

Conditions 1

Size

Total Lines 34
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 23
dl 0
loc 34
rs 9.328
c 0
b 0
f 0
cc 1
nop 0
1
"""
2
Read data from DB and downloads
3
"""
4
from pathlib import Path
5
6
from loguru import logger
7
import geopandas as gpd
8
import pandas as pd
9
10
from egon.data import config
11
from egon.data.db import select_geodataframe
12
13
DATASET_CFG = config.datasets()["mobility_hgv"]
14
WORKING_DIR = Path(".", "heavy_duty_transport").resolve()
15
TESTMODE_OFF = (
16
    config.settings()["egon-data"]["--dataset-boundary"] == "Everything"
17
)
18
19
20
def get_data():
21
    """
22
    Load all necessary data.
23
    """
24
    return boundary_gdf(), bast_gdf(), nuts3_gdf()
25
26
27
def boundary_gdf():
28
    """
29
    Read in German Border from geo.json file.
30
    """
31
    sources = DATASET_CFG["original_data"]["sources"]
32
    srid = DATASET_CFG["tables"]["srid"]
33
34
    if TESTMODE_OFF:
35
        gdf = gpd.read_file(sources["germany"]["url"]).to_crs(epsg=srid)
36
37
        logger.debug("Downloaded germany GeoJSON.")
38
    else:
39
        path = (
40
            WORKING_DIR
41
            / "_".join(sources["NUTS"]["file"].split(".")[:-1])
42
            / sources["NUTS"]["shp_file"]
43
        )
44
45
        gdf = gpd.read_file(path).to_crs(epsg=srid)
46
47
        gdf = gdf.loc[gdf.NUTS_CODE == sources["NUTS"]["NUTS_CODE"]].dissolve()
48
49
        logger.debug("Loaded SH shape file.")
50
51
    return gdf
52
53
54
def bast_gdf():
55
    """
56
    Reads BAST data.
57
    """
58
    sources = DATASET_CFG["original_data"]["sources"]
59
    file = sources["BAST"]["file"]
60
61
    path = WORKING_DIR / file
62
    relevant_columns = sources["BAST"]["relevant_columns"]
63
64
    df = pd.read_csv(
65
        path,
66
        delimiter=r",",
67
        decimal=r",",
68
        thousands=r".",
69
        encoding="ISO-8859-1",
70
        usecols=relevant_columns,
71
    )
72
73
    init_srid = sources["BAST"]["srid"]
74
    final_srid = DATASET_CFG["tables"]["srid"]
75
76
    gdf = gpd.GeoDataFrame(
77
        df[relevant_columns[0]],
78
        geometry=gpd.points_from_xy(
79
            df[relevant_columns[1]],
80
            df[relevant_columns[2]],
81
            crs=f"EPSG:{init_srid}",
82
        ),
83
    ).to_crs(epsg=final_srid)
84
85
    logger.debug("Read in BAST data.")
86
87
    return gdf
88
89
90
def nuts3_gdf():
91
    """Read in NUTS3 geo shapes."""
92
    srid = DATASET_CFG["tables"]["srid"]
93
    sql = """
94
        SELECT nuts as nuts3, geometry FROM boundaries.vg250_krs
95
        WHERE gf = 4
96
        ORDER BY nuts
97
        """
98
99
    gdf = select_geodataframe(
100
        sql, geom_col="geometry", index_col="nuts3"
101
    ).to_crs(epsg=srid)
102
103
    gdf["area"] = gdf.geometry.area
104
105
    logger.debug("Read in NUTS 3 districts.")
106
107
    return gdf
108