1 | # Copyright (c) 2020--2024 Stefan Bender |
||
2 | # |
||
3 | # This module is part of pyspaceweather. |
||
4 | # pyspaceweather is free software: you can redistribute it or modify |
||
5 | # it under the terms of the GNU General Public License as published |
||
6 | # by the Free Software Foundation, version 2. |
||
7 | # See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html. |
||
8 | """Python interface for space weather indices from GFZ Potsdam |
||
9 | |||
10 | GFZ space weather indices ASCII file parser for python [#]_. |
||
11 | |||
12 | .. [#] https://kp.gfz-potsdam.de/en/ |
||
13 | """ |
||
14 | import os |
||
15 | from pkg_resources import resource_filename |
||
16 | import logging |
||
17 | from warnings import warn |
||
18 | |||
19 | import numpy as np |
||
20 | import pandas as pd |
||
21 | |||
22 | from .core import _assert_file_exists, _dl_file |
||
23 | |||
24 | __all__ = [ |
||
25 | "gfz_daily", "gfz_3h", "read_gfz", |
||
26 | "get_gfz_age", "update_gfz", |
||
27 | "GFZ_PATH_ALL", "GFZ_PATH_30D", |
||
28 | ] |
||
29 | |||
30 | GFZ_URL_ALL = "https://kp.gfz-potsdam.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt" |
||
31 | GFZ_URL_30D = "https://kp.gfz-potsdam.de/app/files/Kp_ap_Ap_SN_F107_nowcast.txt" |
||
32 | GFZ_FILE_ALL = os.path.basename(GFZ_URL_ALL) |
||
33 | GFZ_FILE_30D = os.path.basename(GFZ_URL_30D) |
||
34 | GFZ_PATH_ALL = resource_filename(__name__, os.path.join("data", GFZ_FILE_ALL)) |
||
35 | GFZ_PATH_30D = resource_filename(__name__, os.path.join("data", GFZ_FILE_30D)) |
||
36 | |||
37 | |||
38 | def get_gfz_age(gfzpath, relative=True): |
||
39 | """Age of the downloaded data file |
||
40 | |||
41 | Retrieves the last update time of the given file or full path. |
||
42 | |||
43 | Parameters |
||
44 | ---------- |
||
45 | gfzpath: str |
||
46 | Filename to check, absolute path or relative to the current dir. |
||
47 | relative: bool, optional, default True |
||
48 | Return the file's age (True) or the last update time (False). |
||
49 | |||
50 | Returns |
||
51 | ------- |
||
52 | upd: pandas.Timestamp or pandas.Timedelta |
||
53 | The last updated time or the file age, depending on the setting |
||
54 | of `relative` above. |
||
55 | Raises ``IOError`` if the file is not found. |
||
56 | """ |
||
57 | _assert_file_exists(gfzpath) |
||
58 | with open(gfzpath) as fp: |
||
59 | for line in fp: |
||
60 | # forward to last line |
||
61 | pass |
||
62 | upd = pd.to_datetime(line[:10].replace(" ", "-"), utc=True) |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
63 | if relative: |
||
64 | return pd.Timestamp.utcnow() - upd |
||
65 | return upd |
||
66 | |||
67 | |||
68 | def update_gfz( |
||
69 | min_age="1d", |
||
70 | gfzpath_all=GFZ_PATH_ALL, gfzpath_30d=GFZ_PATH_30D, |
||
71 | url_all=GFZ_URL_ALL, url_30d=GFZ_URL_30D, |
||
72 | ): |
||
73 | """Update the local space weather index data |
||
74 | |||
75 | Updates the local space weather index data from the website [#]_, |
||
76 | given that the 30-day file is older than `min_age`, |
||
77 | or the combined (large) file is older than 30 days. |
||
78 | If the data is missing for some reason, a download will be attempted nonetheless. |
||
79 | |||
80 | All arguments are optional and changing them from the defaults should |
||
81 | neither be necessary nor is it recommended. |
||
82 | |||
83 | .. [#] https://kp.gfz-potsdam.de/en/ |
||
84 | |||
85 | Parameters |
||
86 | ---------- |
||
87 | min_age: str, optional, default "1d" |
||
88 | The time after which a new download will be attempted. |
||
89 | The online data is updated every day, thus setting this value to |
||
90 | a shorter time is not needed and not recommended. |
||
91 | gfzpath_all: str, optional, default depending on package install location |
||
92 | Filename for the large combined index file including the |
||
93 | historic data, absolute path or relative to the current dir. |
||
94 | gfzpath_30d: str, optional, default depending on package install location |
||
95 | Filename for the 30-day (nowcast) index file, absolute path or relative |
||
96 | to the current dir. |
||
97 | url_all: str, optional, default `gfz.GFZ_URL_ALL` |
||
98 | The url of the "historic" data file. |
||
99 | url_30d: str, optional, default `gfz.GFZ_URL_30D` |
||
100 | The url of the data file containing the indices for the last 30 days. |
||
101 | |||
102 | Returns |
||
103 | ------- |
||
104 | Nothing. |
||
105 | """ |
||
106 | def _update_file(gfzpath, url, min_age): |
||
107 | if not os.path.exists(gfzpath): |
||
108 | logging.info("{0} not found, downloading.".format(gfzpath)) |
||
109 | _dl_file(gfzpath, url) |
||
110 | return |
||
111 | if get_gfz_age(gfzpath) < pd.Timedelta(min_age): |
||
112 | logging.info("not updating '{0}'.".format(gfzpath)) |
||
113 | return |
||
114 | logging.info("updating '{0}'.".format(gfzpath)) |
||
115 | _dl_file(gfzpath, url) |
||
116 | |||
117 | # Update the large file after 30 days |
||
118 | _update_file(gfzpath_all, url_all, "30days") |
||
119 | # Don't re-download before `min_age` has passed (1d) |
||
120 | _update_file(gfzpath_30d, url_30d, min_age) |
||
121 | |||
122 | |||
123 | def read_gfz(gfzpath): |
||
124 | """Read and parse space weather index data file |
||
125 | |||
126 | Reads the given file and parses it according to the space weather data format. |
||
127 | |||
128 | Parameters |
||
129 | ---------- |
||
130 | gfzpath: str |
||
131 | File to parse, absolute path or relative to the current dir. |
||
132 | |||
133 | Returns |
||
134 | ------- |
||
135 | gfz_df: pandas.DataFrame |
||
136 | The parsed space weather data (daily values). |
||
137 | Raises an ``IOError`` if the file is not found. |
||
138 | |||
139 | The dataframe contains the following columns: |
||
140 | |||
141 | "year", "month", "day": |
||
142 | The observation date |
||
143 | "bsrn": |
||
144 | Bartels Solar Rotation Number. |
||
145 | A sequence of 27-day intervals counted continuously from 1832 Feb 8. |
||
146 | "rotd": |
||
147 | Number of Day within the Bartels 27-day cycle (01-27). |
||
148 | "Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21": |
||
149 | Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600, |
||
150 | 0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT |
||
151 | "Kpsum": Sum of the 8 Kp indices for the day. |
||
152 | Expressed to the nearest third of a unit. |
||
153 | "Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21": |
||
154 | Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600, |
||
155 | 0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT |
||
156 | "Apavg": |
||
157 | Arithmetic average of the 8 Ap indices for the day. |
||
158 | "isn": |
||
159 | International Sunspot Number. |
||
160 | Records contain the Zurich number through 1980 Dec 31 and the |
||
161 | International Brussels number thereafter. |
||
162 | "f107_obs": |
||
163 | Observed (unadjusted) value of F10.7. |
||
164 | "f107_adj": |
||
165 | 10.7-cm Solar Radio Flux (F10.7) Adjusted to 1 AU. |
||
166 | Measured at Ottawa at 1700 UT daily from 1947 Feb 14 until |
||
167 | 1991 May 31 and measured at Penticton at 2000 UT from 1991 Jun 01 on. |
||
168 | Expressed in units of 10-22 W/m2/Hz. |
||
169 | "D": |
||
170 | Definitive indicator. |
||
171 | 0: Kp and SN preliminary |
||
172 | 1: Kp definitive, SN preliminary |
||
173 | 2: Kp and SN definitive |
||
174 | """ |
||
175 | _assert_file_exists(gfzpath) |
||
176 | gfz = np.genfromtxt( |
||
177 | gfzpath, |
||
178 | skip_header=3, |
||
179 | delimiter=[ |
||
180 | # yy mm dd dd dm br db kp kp kp kp kp kp kp kp |
||
181 | 4, 3, 3, 6, 8, 5, 3, 7, 7, 7, 7, 7, 7, 7, 7, |
||
182 | # ap ap ap ap ap ap ap ap Ap sn f1 f2 def |
||
183 | 5, 5, 5, 5, 5, 5, 5, 5, 6, 4, 9, 9, 2, |
||
184 | ], |
||
185 | dtype=( |
||
186 | "i4,i4,i4,i4,f4,i4,i4,f4,f4,f4,f4,f4,f4,f4," |
||
187 | "f4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,f8,i4," |
||
188 | ), |
||
189 | names=[ |
||
190 | "year", "month", "day", "days", "days_m", "bsrn", "rotd", |
||
191 | "Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", |
||
192 | "Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg", |
||
193 | "isn", "f107_obs", "f107_adj", "D", |
||
194 | ] |
||
195 | ) |
||
196 | gfz = gfz[gfz["year"] != -1] |
||
197 | ts = pd.to_datetime([ |
||
198 | "{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd) |
||
199 | for yy, mm, dd in gfz[["year", "month", "day"]] |
||
200 | ]) |
||
201 | gfz_df = pd.DataFrame(gfz, index=ts) |
||
202 | # Sum Kp for compatibility with celestrak dataframe |
||
203 | kpns = list(map("Kp{0}".format, range(0, 23, 3))) |
||
204 | gfz_df.insert(15, "Kpsum", gfz_df[kpns].sum(axis=1)) |
||
205 | return gfz_df |
||
206 | |||
207 | |||
208 | def read_gfz_wdc(gfzpath): |
||
209 | """Parse space weather index data file in WDC format |
||
210 | |||
211 | Parses the GFZ index data in WDC format. |
||
212 | |||
213 | Parameters |
||
214 | ---------- |
||
215 | gfzpath: str |
||
216 | File to parse, absolute path or relative to the current dir. |
||
217 | |||
218 | Returns |
||
219 | ------- |
||
220 | gfz_df: pandas.DataFrame |
||
221 | The parsed space weather data (daily values). |
||
222 | Raises an ``IOError`` if the file is not found. |
||
223 | |||
224 | The dataframe contains the following columns: |
||
225 | |||
226 | "year", "month", "day": |
||
227 | The observation date |
||
228 | "bsrn": |
||
229 | Bartels Solar Rotation Number. |
||
230 | A sequence of 27-day intervals counted continuously from 1832 Feb 8. |
||
231 | "rotd": |
||
232 | Number of Day within the Bartels 27-day cycle (01-27). |
||
233 | "Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21": |
||
234 | Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600, |
||
235 | 0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT |
||
236 | "Kpsum": Sum of the 8 Kp indices for the day. |
||
237 | Expressed to the nearest third of a unit. |
||
238 | "Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21": |
||
239 | Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600, |
||
240 | 0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT |
||
241 | "Apavg": |
||
242 | Arithmetic average of the 8 Ap indices for the day. |
||
243 | "Cp": |
||
244 | Cp index - the daily planetary character figure, a qualitative |
||
245 | estimate of the overall level of geomagnetic activity for this day |
||
246 | determined from the sum of the eight ap amplitudes, |
||
247 | ranging from 0.0 to 2.5 in steps of 0.1. |
||
248 | "C9": |
||
249 | The contracted scale for Cp with only 1 digit, from 0 to 9. |
||
250 | """ |
||
251 | _assert_file_exists(gfzpath) |
||
252 | gfz = np.genfromtxt( |
||
253 | gfzpath, |
||
254 | skip_header=3, |
||
255 | delimiter=[ |
||
256 | # yy mm dd br db kp kp kp kp kp kp kp kp kps |
||
257 | 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, |
||
258 | # ap ap ap ap ap ap ap ap Ap Cp C9 |
||
259 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, |
||
260 | ], |
||
261 | dtype=( |
||
262 | "i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4," |
||
263 | "i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4," |
||
264 | ), |
||
265 | names=[ |
||
266 | "year", "month", "day", "bsrn", "rotd", |
||
267 | "Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum", |
||
268 | "Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg", |
||
269 | "Cp", "C9", |
||
270 | ] |
||
271 | ) |
||
272 | gfz = gfz[gfz["year"] != -1] |
||
273 | ts = pd.to_datetime([ |
||
274 | "{0:04d}-{1:02d}-{2:02d}".format(2000 + yy if yy < 32 else 1900 + yy, mm, dd) |
||
275 | for yy, mm, dd in gfz[["year", "month", "day"]] |
||
276 | ]) |
||
277 | gfz_df = pd.DataFrame(gfz, index=ts) |
||
278 | gfz_df.loc[:, "year"] = ts.year |
||
279 | # Adjust Kp to 0...9 |
||
280 | kpns = list(map("Kp{0}".format, range(0, 23, 3))) + ["Kpsum"] |
||
281 | gfz_df[kpns] = 0.1 * gfz_df[kpns] |
||
282 | return gfz_df |
||
283 | |||
284 | |||
285 | # Common arguments for the public daily and 3h interfaces |
||
286 | _GFZ_COMMON_PARAMS = """ |
||
287 | Parameters |
||
288 | ---------- |
||
289 | gfzpath_all: str, optional, default depending on package install location |
||
290 | Filename for the large combined index file including the |
||
291 | historic data, absolute path or relative to the current dir. |
||
292 | gfzpath_30d: str, optional, default depending on package install location |
||
293 | Filename for the 30-day (nowcast) index file, |
||
294 | absolute path or relative to the current dir. |
||
295 | update: bool, optional, default False |
||
296 | Attempt to update the local data if it is older than `update_interval`. |
||
297 | update_interval: str, optional, default "30days" |
||
298 | The time after which the data are considered "old". |
||
299 | By default, no automatic re-download is initiated, set `update` to true. |
||
300 | The online data is updated every 3 hours, thus setting this value to |
||
301 | a shorter time is not needed and not recommended. |
||
302 | """ |
||
303 | |||
304 | |||
305 | def _doc_param(**sub): |
||
306 | def dec(obj): |
||
307 | obj.__doc__ = obj.__doc__.format(**sub) |
||
308 | return obj |
||
309 | return dec |
||
310 | |||
311 | |||
312 | @_doc_param(params=_GFZ_COMMON_PARAMS) |
||
313 | def gfz_daily(gfzpath_all=GFZ_PATH_ALL, gfzpath_30d=GFZ_PATH_30D, update=False, update_interval="10days"): |
||
314 | """Combined daily Ap, Kp, and f10.7 index values |
||
315 | |||
316 | Combines the "historic" and last-30-day data into one dataframe. |
||
317 | |||
318 | All arguments are optional and changing them from the defaults should not |
||
319 | be required neither should it be necessary nor is it recommended. |
||
320 | {params} |
||
321 | Returns |
||
322 | ------- |
||
323 | gfz_df: pandas.DataFrame |
||
324 | The combined parsed space weather data (daily values). |
||
325 | Raises ``IOError`` if the data files cannot be found. |
||
326 | |||
327 | See Also |
||
328 | -------- |
||
329 | gfz_3h, read_gfz |
||
330 | """ |
||
331 | # ensure that the file exists and is up to date |
||
332 | if ( |
||
333 | not os.path.exists(gfzpath_all) |
||
334 | or not os.path.exists(gfzpath_30d) |
||
335 | ): |
||
336 | warn("Could not find space weather data, trying to download.") |
||
337 | update_gfz(gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d) |
||
338 | |||
339 | if ( |
||
340 | get_gfz_age(gfzpath_all) > pd.Timedelta("30days") |
||
341 | or get_gfz_age(gfzpath_30d) > pd.Timedelta(update_interval) |
||
342 | ): |
||
343 | if update: |
||
344 | update_gfz(gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d) |
||
345 | else: |
||
346 | warn( |
||
347 | "Local data files are older than {0}, pass `update=True` or " |
||
348 | "run `gfz.update_gfz()` manually if you need newer data.".format( |
||
349 | update_interval |
||
350 | ) |
||
351 | ) |
||
352 | |||
353 | df_all = read_gfz(gfzpath_all) |
||
354 | df_30d = read_gfz(gfzpath_30d) |
||
355 | return pd.concat([df_all[:df_30d.index[0]], df_30d[1:]]) |
||
356 | |||
357 | |||
358 | @_doc_param(params=_GFZ_COMMON_PARAMS) |
||
359 | def gfz_3h(*args, **kwargs): |
||
360 | """3h values of Ap and Kp |
||
361 | |||
362 | Provides the 3-hourly Ap and Kp indices from the full daily data set. |
||
363 | |||
364 | Accepts the same arguments as `gfz_daily()`. |
||
365 | All arguments are optional and changing them from the defaults should not |
||
366 | be required neither should it be necessary nor is it recommended. |
||
367 | {params} |
||
368 | Returns |
||
369 | ------- |
||
370 | gfz_df: pandas.DataFrame |
||
371 | The combined Ap and Kp index data (3h values). |
||
372 | The index values are centred at the 3h interval, i.e. at 01:30:00, |
||
373 | 04:30:00, 07:30:00, ... and so on. |
||
374 | Raises ``IOError`` if the data files cannot be found. |
||
375 | |||
376 | See Also |
||
377 | -------- |
||
378 | gfz_daily |
||
379 | """ |
||
380 | daily_df = gfz_daily(*args, **kwargs) |
||
381 | ret = daily_df.copy() |
||
382 | apns = list(map("Ap{0}".format, range(0, 23, 3))) |
||
383 | kpns = list(map("Kp{0}".format, range(0, 23, 3))) |
||
384 | for i, (ap, kp) in enumerate(zip(apns, kpns)): |
||
385 | ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h") |
||
386 | ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h") |
||
387 | gfz_ap = pd.concat(map(ret.__getitem__, apns)) |
||
388 | gfz_kp = pd.concat(map(ret.__getitem__, kpns)) |
||
389 | df = pd.DataFrame({"Ap": gfz_ap, "Kp": gfz_kp}) |
||
390 | return df.reindex(df.index.sort_values()) |
||
391 |