| Conditions | 2 |
| Total Lines | 183 |
| Code Lines | 99 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | """ |
||
| 64 | def extract_and_preprocess_mastr(): |
||
| 65 | """ |
||
| 66 | Extract the downloaded MaStR dump and create cleaned, schema-aligned CSVs. |
||
| 67 | |||
| 68 | This routine expects a MaStR ZIP archive (downloaded by |
||
| 69 | :func:`download_mastr_data`) to be present in ``WORKING_DIR_MASTR_NEW``. |
||
| 70 | It unpacks the archive, reads the *raw* CSV files shipped in the dump, |
||
| 71 | applies a set of harmonization steps (column renaming, categorical |
||
| 72 | normalization, data enrichments), and writes *cleaned* CSVs. The function |
||
| 73 | performs the following steps: |
||
| 74 | |||
| 75 | 1) Locate and extract the MaStR ZIP |
||
| 76 | 2) Read raw CSVs from the extracted dump folder |
||
| 77 | ``bnetza_mastr_wind_raw.csv``, |
||
| 78 | ``bnetza_mastr_solar_raw.csv``, |
||
| 79 | ``bnetza_mastr_biomass_raw.csv``, |
||
| 80 | ``bnetza_mastr_hydro_raw.csv``, |
||
| 81 | ``bnetza_mastr_gsgk_raw.csv``, |
||
| 82 | ``bnetza_mastr_storage_raw.csv``, |
||
| 83 | ``bnetza_mastr_combustion_raw.csv``, |
||
| 84 | ``bnetza_mastr_nuclear_raw.csv``, |
||
| 85 | ``bnetza_mastr_locations_extended_raw.csv``, |
||
| 86 | ``bnetza_mastr_grid_connections_raw.csv``. |
||
| 87 | 3) Voltage-level enrichment for locations |
||
| 88 | 4) Solar-specific fixes |
||
| 89 | 5) Common harmonization across technologies |
||
| 90 | 6) Write cleaned outputs (UTF-8, no index) to ``WORKING_DIR_MASTR_NEW`` |
||
| 91 | - ``bnetza_mastr_wind_cleaned.csv`` |
||
| 92 | - ``bnetza_mastr_solar_cleaned.csv`` |
||
| 93 | - ``bnetza_mastr_biomass_cleaned.csv`` |
||
| 94 | - ``bnetza_mastr_hydro_cleaned.csv`` |
||
| 95 | - ``bnetza_mastr_gsgk_cleaned.csv`` |
||
| 96 | - ``bnetza_mastr_storage_cleaned.csv`` |
||
| 97 | - ``bnetza_mastr_combustion_cleaned.csv`` |
||
| 98 | - ``bnetza_mastr_nuclear_cleaned.csv`` |
||
| 99 | |||
| 100 | Returns |
||
| 101 | ------- |
||
| 102 | None |
||
| 103 | Results are written to disk as CSV files (see list above). |
||
| 104 | """ |
||
| 105 | |||
| 106 | # Extract mastr |
||
| 107 | data_config = egon.data.config.datasets()["mastr_new"] |
||
| 108 | dump_file_name = data_config["dump_name"] |
||
| 109 | raw_data_path = WORKING_DIR_MASTR_NEW / dump_file_name |
||
| 110 | |||
| 111 | with zipfile.ZipFile( |
||
| 112 | WORKING_DIR_MASTR_NEW / (dump_file_name + ".zip"), "r" |
||
| 113 | ) as zip_ref: |
||
| 114 | zip_ref.extractall(WORKING_DIR_MASTR_NEW) |
||
| 115 | |||
| 116 | # prepocess mastr data |
||
| 117 | wind = pd.read_csv(raw_data_path / "bnetza_mastr_wind_raw.csv") |
||
| 118 | solar = pd.read_csv(raw_data_path / "bnetza_mastr_solar_raw.csv") |
||
| 119 | bio_with_th_power = pd.read_csv( |
||
| 120 | raw_data_path / "bnetza_mastr_biomass_raw.csv" |
||
| 121 | ) |
||
| 122 | hydro = pd.read_csv(raw_data_path / "bnetza_mastr_hydro_raw.csv") |
||
| 123 | gsgk = pd.read_csv(raw_data_path / "bnetza_mastr_gsgk_raw.csv") |
||
| 124 | storage = pd.read_csv(raw_data_path / "bnetza_mastr_storage_raw.csv") |
||
| 125 | combustion_with_th_power = pd.read_csv( |
||
| 126 | raw_data_path / "bnetza_mastr_combustion_raw.csv" |
||
| 127 | ) |
||
| 128 | nuclear = pd.read_csv(raw_data_path / "bnetza_mastr_nuclear_raw.csv") |
||
| 129 | |||
| 130 | loc = pd.read_csv( |
||
| 131 | raw_data_path / "bnetza_mastr_locations_extended_raw.csv" |
||
| 132 | ) |
||
| 133 | gcp = pd.read_csv(raw_data_path / "bnetza_mastr_grid_connections_raw.csv") |
||
| 134 | |||
| 135 | loc_vlevel = loc.merge( |
||
| 136 | gcp, |
||
| 137 | left_on="Netzanschlusspunkte", |
||
| 138 | right_on="NetzanschlusspunktMastrNummer", |
||
| 139 | how="left", |
||
| 140 | ) |
||
| 141 | |||
| 142 | loc_vlevel.replace( |
||
| 143 | { |
||
| 144 | "Spannungsebene": { |
||
| 145 | "Niederspannung (= Hausanschluss/Haushaltsstrom)": "Niederspannung", |
||
| 146 | "Umspannebene Mittelspannung/Niederspannung": "UmspannungZurNiederspannung", |
||
| 147 | "Umspannebene Hochspannung/Mittelspannung": "UmspannungZurMittelspannung", |
||
| 148 | "Umspannebene Höchstspannung/Hochspannung": "UmspannungZurHochspannung", |
||
| 149 | } |
||
| 150 | }, |
||
| 151 | inplace=True, |
||
| 152 | ) |
||
| 153 | |||
| 154 | # Locations and grid conn. points |
||
| 155 | cols_mapping = {"MastrNummer": "MaStRNummer"} |
||
| 156 | loc_vlevel.rename(columns=cols_mapping).to_csv( |
||
| 157 | WORKING_DIR_MASTR_NEW / "location_elec_generation_raw.csv", |
||
| 158 | index=None, |
||
| 159 | encoding="UTF-8", |
||
| 160 | ) |
||
| 161 | |||
| 162 | # Fix solar |
||
| 163 | solar["Standort"] = solar.Postleitzahl.apply(str) + " " + solar.Ort |
||
| 164 | solar["Bruttoleistung_extended"] = solar.Bruttoleistung |
||
| 165 | solar["InstallierteLeistung"] = solar.Bruttoleistung |
||
| 166 | |||
| 167 | cols_mapping = { |
||
| 168 | "ZugeordneteWirkleistungWechselrichter": "zugeordneteWirkleistungWechselrichter" |
||
| 169 | } |
||
| 170 | |||
| 171 | solar.rename(columns=cols_mapping, inplace=True) |
||
| 172 | |||
| 173 | cols_mapping = {"MastrNummer": "MaStRNummer"} |
||
| 174 | |||
| 175 | states_renaming = { |
||
| 176 | "Thüringen": "Thueringen", |
||
| 177 | "Schleswig-Holstein": "SchleswigHolstein", |
||
| 178 | "Nordrhein-Westfalen": "NordrheinWestfalen", |
||
| 179 | "Rheinland-Pfalz": "RheinlandPfalz", |
||
| 180 | "Baden-Württemberg": "BadenWuerttemberg", |
||
| 181 | "Sachsen-Anhalt": "SachsenAnhalt", |
||
| 182 | "Mecklenburg-Vorpommern": "MecklenburgVorpommern", |
||
| 183 | "Ausschließliche Wirtschaftszone": "AusschliesslicheWirtschaftszone", |
||
| 184 | } |
||
| 185 | status_renaming = { |
||
| 186 | "In Betrieb": "InBetrieb", |
||
| 187 | "Vorübergehend stillgelegt": "VoruebergehendStillgelegt", |
||
| 188 | "Endgültig stillgelegt": "DauerhaftStillgelegt", |
||
| 189 | "In Planung": "InPlanung", |
||
| 190 | } |
||
| 191 | values_renaming = { |
||
| 192 | "Bundesland": states_renaming, |
||
| 193 | "EinheitBetriebsstatus": status_renaming, |
||
| 194 | } |
||
| 195 | |||
| 196 | # Export data |
||
| 197 | wind.rename(columns=cols_mapping).replace(values_renaming).to_csv( |
||
| 198 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_wind_cleaned.csv", |
||
| 199 | index=None, |
||
| 200 | encoding="UTF-8", |
||
| 201 | ) |
||
| 202 | |||
| 203 | solar.rename(columns=cols_mapping).replace(values_renaming).to_csv( |
||
| 204 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_solar_cleaned.csv", |
||
| 205 | index=None, |
||
| 206 | encoding="UTF-8", |
||
| 207 | ) |
||
| 208 | |||
| 209 | bio_with_th_power.rename(columns=cols_mapping).replace( |
||
| 210 | values_renaming |
||
| 211 | ).to_csv( |
||
| 212 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_biomass_cleaned.csv", |
||
| 213 | index=None, |
||
| 214 | encoding="UTF-8", |
||
| 215 | ) |
||
| 216 | |||
| 217 | hydro.rename(columns=cols_mapping).replace(values_renaming).to_csv( |
||
| 218 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_hydro_cleaned.csv", |
||
| 219 | index=None, |
||
| 220 | encoding="UTF-8", |
||
| 221 | ) |
||
| 222 | |||
| 223 | gsgk.rename(columns=cols_mapping).replace(values_renaming).to_csv( |
||
| 224 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_gsgk_cleaned.csv", |
||
| 225 | index=None, |
||
| 226 | encoding="UTF-8", |
||
| 227 | ) |
||
| 228 | |||
| 229 | storage.rename(columns=cols_mapping).replace(values_renaming).to_csv( |
||
| 230 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_storage_cleaned.csv", |
||
| 231 | index=None, |
||
| 232 | encoding="UTF-8", |
||
| 233 | ) |
||
| 234 | |||
| 235 | combustion_with_th_power.rename(columns=cols_mapping).replace( |
||
| 236 | values_renaming |
||
| 237 | ).to_csv( |
||
| 238 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_combustion_cleaned.csv", |
||
| 239 | index=None, |
||
| 240 | encoding="UTF-8", |
||
| 241 | ) |
||
| 242 | |||
| 243 | nuclear.rename(columns=cols_mapping).replace(values_renaming).to_csv( |
||
| 244 | WORKING_DIR_MASTR_NEW / "bnetza_mastr_nuclear_cleaned.csv", |
||
| 245 | index=None, |
||
| 246 | encoding="UTF-8", |
||
| 247 | ) |
||
| 295 |