Completed
Push — dev ( 85d654...312d71 )
by
unknown
21s queued 16s
created

  D

Complexity

Conditions 12

Size

Total Lines 62
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 29
dl 0
loc 62
rs 4.8
c 0
b 0
f 0
cc 12
nop 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like motorized_individual_travel.helpers.reduce_mem_usage() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""
2
Helpers: constants and functions for motorized individual travel
3
"""
4
5
from pathlib import Path
6
import json
7
8
import numpy as np
9
import pandas as pd
10
11
import egon.data.config
12
13
TESTMODE_OFF = (
14
    egon.data.config.settings()["egon-data"]["--dataset-boundary"]
15
    == "Everything"
16
)
17
WORKING_DIR = Path(".", "emobility")
18
DATA_BUNDLE_DIR = Path(
19
    ".",
20
    "data_bundle_egon_data",
21
    "emobility",
22
)
23
DATASET_CFG = egon.data.config.datasets()["emobility_mit"]
24
COLUMNS_KBA = [
25
    "reg_district",
26
    "total",
27
    "mini",
28
    "medium",
29
    "luxury",
30
    "unknown",
31
]
32
CONFIG_EV = {
33
    "bev_mini": {
34
        "column": "mini",
35
        "tech_share": "bev_mini_share",
36
        "share": "mini_share",
37
        "factor": "mini_factor",
38
    },
39
    "bev_medium": {
40
        "column": "medium",
41
        "tech_share": "bev_medium_share",
42
        "share": "medium_share",
43
        "factor": "medium_factor",
44
    },
45
    "bev_luxury": {
46
        "column": "luxury",
47
        "tech_share": "bev_luxury_share",
48
        "share": "luxury_share",
49
        "factor": "luxury_factor",
50
    },
51
    "phev_mini": {
52
        "column": "mini",
53
        "tech_share": "phev_mini_share",
54
        "share": "mini_share",
55
        "factor": "mini_factor",
56
    },
57
    "phev_medium": {
58
        "column": "medium",
59
        "tech_share": "phev_medium_share",
60
        "share": "medium_share",
61
        "factor": "medium_factor",
62
    },
63
    "phev_luxury": {
64
        "column": "luxury",
65
        "tech_share": "phev_luxury_share",
66
        "share": "luxury_share",
67
        "factor": "luxury_factor",
68
    },
69
}
70
TRIP_COLUMN_MAPPING = {
71
    "location": "location",
72
    "use_case": "use_case",
73
    "nominal_charging_capacity_kW": "charging_capacity_nominal",
74
    "grid_charging_capacity_kW": "charging_capacity_grid",
75
    "battery_charging_capacity_kW": "charging_capacity_battery",
76
    "soc_start": "soc_start",
77
    "soc_end": "soc_end",
78
    "chargingdemand_kWh": "charging_demand",
79
    "park_start_timesteps": "park_start",
80
    "park_end_timesteps": "park_end",
81
    "drive_start_timesteps": "drive_start",
82
    "drive_end_timesteps": "drive_end",
83
    "consumption_kWh": "consumption",
84
}
85
MVGD_MIN_COUNT = 3700 if TESTMODE_OFF else 150
86
87
88
def read_kba_data():
89
    """Read KBA data from CSV"""
90
    return pd.read_csv(
91
        WORKING_DIR
92
        / egon.data.config.datasets()["emobility_mit"]["original_data"][
93
            "sources"
94
        ]["KBA"]["file_processed"]
95
    )
96
97
98
def read_rs7_data():
99
    """Read RegioStaR7 data from CSV"""
100
    return pd.read_csv(
101
        WORKING_DIR
102
        / egon.data.config.datasets()["emobility_mit"]["original_data"][
103
            "sources"
104
        ]["RS7"]["file_processed"]
105
    )
106
107
108
def read_simbev_metadata_file(scenario_name, section):
109
    """Read metadata of simBEV run
110
111
    Parameters
112
    ----------
113
    scenario_name : str
114
        Scenario name
115
    section : str
116
        Metadata section to be returned, one of
117
        * "tech_data"
118
        * "charge_prob_slow"
119
        * "charge_prob_fast"
120
121
    Returns
122
    -------
123
    pd.DataFrame
124
        Config data
125
    """
126
    trips_cfg = DATASET_CFG["original_data"]["sources"]["trips"]
127
    meta_file = DATA_BUNDLE_DIR / Path(
128
        "mit_trip_data",
129
        trips_cfg[scenario_name]["file"].split(".")[0],
130
        trips_cfg[scenario_name]["file_metadata"],
131
    )
132
    with open(meta_file) as f:
133
        meta = json.loads(f.read())
134
    return pd.DataFrame.from_dict(meta.get(section, dict()), orient="index")
135
136
137
def reduce_mem_usage(
138
    df: pd.DataFrame, show_reduction: bool = False
139
) -> pd.DataFrame:
140
    """Function to automatically check if columns of a pandas DataFrame can
141
    be reduced to a smaller data type. Source:
142
    https://www.mikulskibartosz.name/how-to-reduce-memory-usage-in-pandas/
143
144
    Parameters
145
    ----------
146
    df: pd.DataFrame
147
        DataFrame to reduce memory usage on
148
    show_reduction : bool
149
        If True, print amount of memory reduced
150
151
    Returns
152
    -------
153
    pd.DataFrame
154
        DataFrame with memory usage decreased
155
    """
156
    start_mem = df.memory_usage().sum() / 1024 ** 2
157
158
    for col in df.columns:
159
        col_type = df[col].dtype
160
161
        if col_type != object and str(col_type) != "category":
162
            c_min = df[col].min()
163
            c_max = df[col].max()
164
165
            if str(col_type)[:3] == "int":
166
                if (
167
                    c_min > np.iinfo(np.int16).min
168
                    and c_max < np.iinfo(np.int16).max
169
                ):
170
                    df[col] = df[col].astype("int16")
171
                elif (
172
                    c_min > np.iinfo(np.int32).min
173
                    and c_max < np.iinfo(np.int32).max
174
                ):
175
                    df[col] = df[col].astype("int32")
176
                else:
177
                    df[col] = df[col].astype("int64")
178
            else:
179
                if (
180
                    c_min > np.finfo(np.float32).min
181
                    and c_max < np.finfo(np.float32).max
182
                ):
183
                    df[col] = df[col].astype("float32")
184
                else:
185
                    df[col] = df[col].astype("float64")
186
187
        else:
188
            df[col] = df[col].astype("category")
189
190
    end_mem = df.memory_usage().sum() / 1024 ** 2
191
192
    if show_reduction is True:
193
        print(
194
            "Reduced memory usage of DataFrame by "
195
            f"{(1 - end_mem/start_mem) * 100:.2f} %."
196
        )
197
198
    return df
199