motorized_individual_travel_charging_infrastructure.infrastructure_allocation - Code Metrics - Inspection of "Features/#937 charging infrastructure" - openego/eGon-data - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — dev (#943)

unknown

created 2022-09-21 10:53 UTC

motorized_individual_travel_charging_infrastructure.infrastructure_allocation A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	215
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	11
eloc	130
dl	0
loc	215
rs	10
c	0
b	0
f	0

5 Functions

Rating	Name	Size	Complexity
B	get_data()	113	3
A	run_tracbev_potential()	9	2
A	run_use_cases()	22	1
A	run_tracbev()	4	1
A	write_to_db()	36	4

from __future__ import annotations

from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd

from egon.data import config, db
from egon.data.datasets.emobility.motorized_individual_travel_charging_infrastructure.use_cases import (  # noqa: E501
    home,
    hpc,
    public,
    work,
)

WORKING_DIR = Path(".", "charging_infrastructure").resolve()
DATASET_CFG = config.datasets()["charging_infrastructure"]


def write_to_db(gdf: gpd.GeoDataFrame, mv_grid_id: int | float, use_case: str):
    if gdf.empty:
        return

    if "energy" in gdf.columns:
        gdf = gdf.assign(weight=gdf.energy.div(gdf.energy.sum()))
    else:
        rng = np.random.default_rng(DATASET_CFG["constants"]["random_seed"])

        gdf = gdf.assign(weight=rng.integers(low=0, high=100, size=len(gdf)))

        gdf = gdf.assign(weight=gdf.weight.div(gdf.weight.sum()))

    max_id = db.select_dataframe(
        """
        SELECT MAX(cp_id) FROM grid.egon_emob_charging_infrastructure
        """
    )["max"][0]

    if max_id is None:
        max_id = 0

    gdf = gdf.assign(
        cp_id=range(max_id, max_id + len(gdf)),
        mv_grid_id=mv_grid_id,
        use_case=use_case,
    )

    targets = DATASET_CFG["targets"]
    cols_to_export = targets["charging_infrastructure"]["cols_to_export"]

    gpd.GeoDataFrame(gdf[cols_to_export], crs=gdf.crs).to_postgis(
        targets["charging_infrastructure"]["table"],
        schema=targets["charging_infrastructure"]["schema"],
        con=db.engine(),
        if_exists="append",
    )


def run_tracbev():
    data_dict = get_data()

    run_tracbev_potential(data_dict)


def run_tracbev_potential(data_dict):
    bounds = data_dict["boundaries"]

    for mv_grid_id in data_dict["regions"].mv_grid_id:
        region = bounds.loc[bounds.bus_id == mv_grid_id].geom

        data_dict.update({"region": region, "key": mv_grid_id})
        # Start Use Cases
        run_use_cases(data_dict)


def run_use_cases(data_dict):
    write_to_db(
        hpc(data_dict["hpc_positions"], data_dict),
        data_dict["key"],
        use_case="hpc",
    )
    write_to_db(
        public(
            data_dict["public_positions"], data_dict["poi_cluster"], data_dict
        ),
        data_dict["key"],
        use_case="public",
    )
    write_to_db(
        work(data_dict["landuse"], data_dict["work_dict"], data_dict),
        data_dict["key"],
        use_case="work",
    )
    write_to_db(
        home(data_dict["housing_data"], data_dict),
        data_dict["key"],
        use_case="home",
    )


def get_data() -> dict[gpd.GeoDataFrame]:
    tracbev_cfg = DATASET_CFG["original_data"]["sources"]["tracbev"]
    srid = tracbev_cfg["srid"]

    # TODO: get zensus housing data from DB instead of gpkg?
    files = tracbev_cfg["files_to_use"]

    data_dict = {}

    # get TracBEV files
    for f in files:
        file = WORKING_DIR / "data" / f
        name = f.split(".")[0]

        data_dict[name] = gpd.read_file(file)

        if "undefined" in data_dict[name].crs.name.lower():
            data_dict[name] = data_dict[name].set_crs(
                epsg=srid, allow_override=True
            )
        else:
            data_dict[name] = data_dict[name].to_crs(epsg=srid)

    # get housing data from DB
    sql = """
    SELECT building_id, cell_id
    FROM demand.egon_household_electricity_profile_of_buildings
    """

    df = db.select_dataframe(sql)

    count_df = (
        df.groupby(["building_id", "cell_id"])
        .size()
        .reset_index()
        .rename(columns={0: "count"})
    )

    mfh_df = (
        count_df.loc[count_df["count"] > 1]
        .groupby(["cell_id"])
        .size()
        .reset_index()
        .rename(columns={0: "num_mfh"})
    )
    efh_df = (
        count_df.loc[count_df["count"] <= 1]
        .groupby(["cell_id"])
        .size()
        .reset_index()
        .rename(columns={0: "num"})
    )

    comb_df = (
        mfh_df.merge(
            right=efh_df, how="outer", left_on="cell_id", right_on="cell_id"
        )
        .fillna(0)
        .astype(int)
    )

    sql = """
    SELECT zensus_population_id, geom as geometry
    FROM society.egon_destatis_zensus_apartment_building_population_per_ha
    """

    gdf = db.select_geodataframe(sql, geom_col="geometry", epsg=srid)

    data_dict["housing_data"] = gpd.GeoDataFrame(
        gdf.merge(
            right=comb_df, left_on="zensus_population_id", right_on="cell_id"
        ),
        crs=gdf.crs,
    ).drop(columns=["cell_id"])

    # get boundaries aka grid districts
    sql = """
    SELECT bus_id, geom FROM grid.egon_mv_grid_district
    """

    data_dict["boundaries"] = db.select_geodataframe(
        sql, geom_col="geom", epsg=srid
    )

    data_dict["regions"] = pd.DataFrame(
        columns=["mv_grid_id"],
        data=data_dict["boundaries"].bus_id.unique(),
    )

    data_dict["work_dict"] = {
        "retail": DATASET_CFG["constants"]["work_weight_retail"],
        "commercial": DATASET_CFG["constants"]["work_weight_commercial"],
        "industrial": DATASET_CFG["constants"]["work_weight_industrial"],
    }

    data_dict["sfh_available"] = DATASET_CFG["constants"][
        "single_family_home_share"
    ]
    data_dict["sfh_avg_spots"] = DATASET_CFG["constants"][
        "single_family_home_spots"
    ]
    data_dict["mfh_available"] = DATASET_CFG["constants"][
        "multi_family_home_share"
    ]
    data_dict["mfh_avg_spots"] = DATASET_CFG["constants"][
        "multi_family_home_spots"
    ]

    data_dict["random_seed"] = np.random.default_rng(
        DATASET_CFG["constants"]["random_seed"]
    )

    return data_dict


1			from __future__ import annotations
2
3			from pathlib import Path
4
5			import geopandas as gpd
6			import numpy as np
7			import pandas as pd
8
9			from egon.data import config, db
10			from egon.data.datasets.emobility.motorized_individual_travel_charging_infrastructure.use_cases import ( # noqa: E501
11			home,
12			hpc,
13			public,
14			work,
15			)
16
17			WORKING_DIR = Path(".", "charging_infrastructure").resolve()
18			DATASET_CFG = config.datasets()["charging_infrastructure"]
19
20
21			def write_to_db(gdf: gpd.GeoDataFrame, mv_grid_id: int \| float, use_case: str):
22			if gdf.empty:
23			return
24
25			if "energy" in gdf.columns:
26			gdf = gdf.assign(weight=gdf.energy.div(gdf.energy.sum()))
27			else:
28			rng = np.random.default_rng(DATASET_CFG["constants"]["random_seed"])
29
30			gdf = gdf.assign(weight=rng.integers(low=0, high=100, size=len(gdf)))
31
32			gdf = gdf.assign(weight=gdf.weight.div(gdf.weight.sum()))
33
34			max_id = db.select_dataframe(
35			"""
36			SELECT MAX(cp_id) FROM grid.egon_emob_charging_infrastructure
37			"""
38			)["max"][0]
39
40			if max_id is None:
41			max_id = 0
42
43			gdf = gdf.assign(
44			cp_id=range(max_id, max_id + len(gdf)),
45			mv_grid_id=mv_grid_id,
46			use_case=use_case,
47			)
48
49			targets = DATASET_CFG["targets"]
50			cols_to_export = targets["charging_infrastructure"]["cols_to_export"]
51
52			gpd.GeoDataFrame(gdf[cols_to_export], crs=gdf.crs).to_postgis(
53			targets["charging_infrastructure"]["table"],
54			schema=targets["charging_infrastructure"]["schema"],
55			con=db.engine(),
56			if_exists="append",
57			)
58
59
60			def run_tracbev():
61			data_dict = get_data()
62
63			run_tracbev_potential(data_dict)
64
65
66			def run_tracbev_potential(data_dict):
67			bounds = data_dict["boundaries"]
68
69			for mv_grid_id in data_dict["regions"].mv_grid_id:
70			region = bounds.loc[bounds.bus_id == mv_grid_id].geom
71
72			data_dict.update({"region": region, "key": mv_grid_id})
73			# Start Use Cases
74			run_use_cases(data_dict)
75
76
77			def run_use_cases(data_dict):
78			write_to_db(
79			hpc(data_dict["hpc_positions"], data_dict),
80			data_dict["key"],
81			use_case="hpc",
82			)
83			write_to_db(
84			public(
85			data_dict["public_positions"], data_dict["poi_cluster"], data_dict
86			),
87			data_dict["key"],
88			use_case="public",
89			)
90			write_to_db(
91			work(data_dict["landuse"], data_dict["work_dict"], data_dict),
92			data_dict["key"],
93			use_case="work",
94			)
95			write_to_db(
96			home(data_dict["housing_data"], data_dict),
97			data_dict["key"],
98			use_case="home",
99			)
100
101
102			def get_data() -> dict[gpd.GeoDataFrame]:
103			tracbev_cfg = DATASET_CFG["original_data"]["sources"]["tracbev"]
104			srid = tracbev_cfg["srid"]
105
106			# TODO: get zensus housing data from DB instead of gpkg?
107			files = tracbev_cfg["files_to_use"]
108
109			data_dict = {}
110
111			# get TracBEV files
112			for f in files:
113			file = WORKING_DIR / "data" / f
114			name = f.split(".")[0]
115
116			data_dict[name] = gpd.read_file(file)
117
118			if "undefined" in data_dict[name].crs.name.lower():
119			data_dict[name] = data_dict[name].set_crs(
120			epsg=srid, allow_override=True
121			)
122			else:
123			data_dict[name] = data_dict[name].to_crs(epsg=srid)
124
125			# get housing data from DB
126			sql = """
127			SELECT building_id, cell_id
128			FROM demand.egon_household_electricity_profile_of_buildings
129			"""
130
131			df = db.select_dataframe(sql)
132
133			count_df = (
134			df.groupby(["building_id", "cell_id"])
135			.size()
136			.reset_index()
137			.rename(columns={0: "count"})
138			)
139
140			mfh_df = (
141			count_df.loc[count_df["count"] > 1]
142			.groupby(["cell_id"])
143			.size()
144			.reset_index()
145			.rename(columns={0: "num_mfh"})
146			)
147			efh_df = (
148			count_df.loc[count_df["count"] <= 1]
149			.groupby(["cell_id"])
150			.size()
151			.reset_index()
152			.rename(columns={0: "num"})
153			)
154
155			comb_df = (
156			mfh_df.merge(
157			right=efh_df, how="outer", left_on="cell_id", right_on="cell_id"
158			)
159			.fillna(0)
160			.astype(int)
161			)
162
163			sql = """
164			SELECT zensus_population_id, geom as geometry
165			FROM society.egon_destatis_zensus_apartment_building_population_per_ha
166			"""
167
168			gdf = db.select_geodataframe(sql, geom_col="geometry", epsg=srid)
169
170			data_dict["housing_data"] = gpd.GeoDataFrame(
171			gdf.merge(
172			right=comb_df, left_on="zensus_population_id", right_on="cell_id"
173			),
174			crs=gdf.crs,
175			).drop(columns=["cell_id"])
176
177			# get boundaries aka grid districts
178			sql = """
179			SELECT bus_id, geom FROM grid.egon_mv_grid_district
180			"""
181
182			data_dict["boundaries"] = db.select_geodataframe(
183			sql, geom_col="geom", epsg=srid
184			)
185
186			data_dict["regions"] = pd.DataFrame(
187			columns=["mv_grid_id"],
188			data=data_dict["boundaries"].bus_id.unique(),
189			)
190
191			data_dict["work_dict"] = {
192			"retail": DATASET_CFG["constants"]["work_weight_retail"],
193			"commercial": DATASET_CFG["constants"]["work_weight_commercial"],
194			"industrial": DATASET_CFG["constants"]["work_weight_industrial"],
195			}
196
197			data_dict["sfh_available"] = DATASET_CFG["constants"][
198			"single_family_home_share"
199			]
200			data_dict["sfh_avg_spots"] = DATASET_CFG["constants"][
201			"single_family_home_spots"
202			]
203			data_dict["mfh_available"] = DATASET_CFG["constants"][
204			"multi_family_home_share"
205			]
206			data_dict["mfh_avg_spots"] = DATASET_CFG["constants"][
207			"multi_family_home_spots"
208			]
209
210			data_dict["random_seed"] = np.random.default_rng(
211			DATASET_CFG["constants"]["random_seed"]
212			)
213
214			return data_dict
215

openego / eGon-data

Pull Request — dev (#943)

motorized_individual_travel_charging_infrastructure.infrastructure_allocation A

Complexity

Size/Duplication

Importance

5 Functions

Duplication Side-by-Side

Filter issues like