1 | # Copyright (c) 2020--2022 Stefan Bender |
||
2 | # |
||
3 | # This module is part of pyspaceweather. |
||
4 | # pyspaceweather is free software: you can redistribute it or modify |
||
5 | # it under the terms of the GNU General Public License as published |
||
6 | # by the Free Software Foundation, version 2. |
||
7 | # See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html. |
||
8 | """Python interface for space weather indices |
||
9 | |||
10 | Celestrak space weather indices file parser for python [#]_. |
||
11 | |||
12 | .. [#] https://celestrak.com/SpaceData/ |
||
13 | """ |
||
14 | import os |
||
15 | from pkg_resources import resource_filename |
||
16 | import logging |
||
17 | from warnings import warn |
||
18 | |||
19 | import numpy as np |
||
20 | import pandas as pd |
||
21 | |||
22 | from .core import _assert_file_exists, _dl_file |
||
23 | |||
24 | __all__ = [ |
||
25 | "sw_daily", "ap_kp_3h", "read_sw", |
||
26 | "get_file_age", "update_data", |
||
27 | "SW_PATH_ALL", "SW_PATH_5Y", |
||
28 | ] |
||
29 | |||
30 | DL_URL_ALL = "https://celestrak.com/SpaceData/SW-All.txt" |
||
31 | DL_URL_5Y = "https://celestrak.com/SpaceData/SW-Last5Years.txt" |
||
32 | SW_FILE_ALL = os.path.basename(DL_URL_ALL) |
||
33 | SW_FILE_5Y = os.path.basename(DL_URL_5Y) |
||
34 | SW_PATH_ALL = resource_filename(__name__, os.path.join("data", SW_FILE_ALL)) |
||
35 | SW_PATH_5Y = resource_filename(__name__, os.path.join("data", SW_FILE_5Y)) |
||
36 | |||
37 | |||
38 | def get_file_age(swpath, relative=True): |
||
39 | """Age of the downloaded data file |
||
40 | |||
41 | Retrieves the last update time of the given file or full path. |
||
42 | |||
43 | Parameters |
||
44 | ---------- |
||
45 | swpath: str |
||
46 | Filename to check, absolute path or relative to the current dir. |
||
47 | relative: bool, optional, default True |
||
48 | Return the file's age (True) or the last update time (False). |
||
49 | |||
50 | Returns |
||
51 | ------- |
||
52 | upd: pandas.Timestamp or pandas.Timedelta |
||
53 | The last updated time or the file age, depending on the setting |
||
54 | of `relative` above. |
||
55 | Raises ``IOError`` if the file is not found. |
||
56 | """ |
||
57 | _assert_file_exists(swpath) |
||
58 | with open(swpath) as fp: |
||
59 | for line in fp: |
||
60 | if line.startswith("UPDATED"): |
||
61 | # closes the file automatically |
||
62 | break |
||
63 | upd = pd.to_datetime(line.lstrip("UPDATED"), utc=True) |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
64 | if relative: |
||
65 | return pd.Timestamp.utcnow() - upd |
||
66 | return upd |
||
67 | |||
68 | |||
69 | def update_data( |
||
70 | min_age="3h", |
||
71 | swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y, |
||
72 | url_all=DL_URL_ALL, url_5y=DL_URL_5Y, |
||
73 | ): |
||
74 | """Update the local space weather index data |
||
75 | |||
76 | Updates the local space weather index data from the website [#]_, |
||
77 | given that the 5-year file is older |
||
78 | than `min_age`, or the combined (large) file is older than four years. |
||
79 | If the data is missing for some reason, a download will be attempted nonetheless. |
||
80 | |||
81 | All arguments are optional and changing them from the defaults should not |
||
82 | be required neither should it be necessary nor is it recommended. |
||
83 | |||
84 | .. [#] https://celestrak.com/SpaceData/ |
||
85 | |||
86 | Parameters |
||
87 | ---------- |
||
88 | min_age: str, optional, default "3h" |
||
89 | The time after which a new download will be attempted. |
||
90 | The online data is updated every 3 hours, thus setting this value to |
||
91 | a shorter time is not needed and not recommended. |
||
92 | swpath_all: str, optional, default depending on package install location |
||
93 | Filename for the large combined index file including the |
||
94 | historic data, absolute path or relative to the current dir. |
||
95 | swpath_5y: str, optional, default depending on package install location |
||
96 | Filename for the 5-year index file, absolute path or relative to the current dir. |
||
97 | url_all: str, optional, default `sw.DL_URL_ALL` |
||
98 | The url of the "historic" data file. |
||
99 | url_5y: str, optional, default `sw.DL_URL_5Y` |
||
100 | The url of the data file of containing the indices of the last 5 years. |
||
101 | |||
102 | Returns |
||
103 | ------- |
||
104 | Nothing. |
||
105 | """ |
||
106 | def _update_file(swpath, url, min_age): |
||
107 | if not os.path.exists(swpath): |
||
108 | logging.info("{0} not found, downloading.".format(swpath)) |
||
109 | _dl_file(swpath, url) |
||
110 | return |
||
111 | if get_file_age(swpath) < pd.Timedelta(min_age): |
||
112 | logging.info("not updating '{0}'.".format(swpath)) |
||
113 | return |
||
114 | logging.info("updating '{0}'.".format(swpath)) |
||
115 | _dl_file(swpath, url) |
||
116 | |||
117 | # Update the large file after four years |
||
118 | # to have some overlap with the 5-year data |
||
119 | # 1460 = 4 * 365 |
||
120 | _update_file(swpath_all, url_all, "1460days") |
||
121 | # Don't re-download before `min_age` has passed (3h) |
||
122 | _update_file(swpath_5y, url_5y, min_age) |
||
123 | |||
124 | |||
125 | def read_sw(swpath): |
||
126 | """Read and parse space weather index data file |
||
127 | |||
128 | Reads the given file and parses it according to the space weather data format. |
||
129 | |||
130 | Parameters |
||
131 | ---------- |
||
132 | swpath: str |
||
133 | File to parse, absolute path or relative to the current dir. |
||
134 | |||
135 | Returns |
||
136 | ------- |
||
137 | sw_df: pandas.DataFrame |
||
138 | The parsed space weather data (daily values). |
||
139 | Raises an ``IOError`` if the file is not found. |
||
140 | |||
141 | The dataframe contains the following columns: |
||
142 | |||
143 | "year", "month", "day": |
||
144 | The observation date |
||
145 | "bsrn": |
||
146 | Bartels Solar Rotation Number. |
||
147 | A sequence of 27-day intervals counted continuously from 1832 Feb 8. |
||
148 | "rotd": |
||
149 | Number of Day within the Bartels 27-day cycle (01-27). |
||
150 | "Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21": |
||
151 | Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600, |
||
152 | 0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT |
||
153 | "Kpsum": Sum of the 8 Kp indices for the day. |
||
154 | Expressed to the nearest third of a unit. |
||
155 | "Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21": |
||
156 | Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600, |
||
157 | 0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT |
||
158 | "Apavg": |
||
159 | Arithmetic average of the 8 Ap indices for the day. |
||
160 | "Cp": |
||
161 | Cp or Planetary Daily Character Figure. A qualitative estimate of |
||
162 | overall level of magnetic activity for the day determined from the sum |
||
163 | of the 8 Ap indices. Cp ranges, in steps of one-tenth, from 0 (quiet) |
||
164 | to 2.5 (highly disturbed). "C9": |
||
165 | "isn": |
||
166 | International Sunspot Number. |
||
167 | Records contain the Zurich number through 1980 Dec 31 and the |
||
168 | International Brussels number thereafter. |
||
169 | "f107_adj": |
||
170 | 10.7-cm Solar Radio Flux (F10.7) Adjusted to 1 AU. |
||
171 | Measured at Ottawa at 1700 UT daily from 1947 Feb 14 until |
||
172 | 1991 May 31 and measured at Penticton at 2000 UT from 1991 Jun 01 on. |
||
173 | Expressed in units of 10-22 W/m2/Hz. |
||
174 | "Q": |
||
175 | Flux Qualifier. |
||
176 | 0 indicates flux required no adjustment; |
||
177 | 1 indicates flux required adjustment for burst in progress at time of measurement; |
||
178 | 2 indicates a flux approximated by either interpolation or extrapolation; |
||
179 | 3 indicates no observation; and |
||
180 | 4 indicates CSSI interpolation of missing data. |
||
181 | "f107_81ctr_adj": |
||
182 | Centered 81-day arithmetic average of F10.7 (adjusted). |
||
183 | "f107_81lst_adj": |
||
184 | Last 81-day arithmetic average of F10.7 (adjusted). |
||
185 | "f107_obs": |
||
186 | Observed (unadjusted) value of F10.7. |
||
187 | "f107_81ctr_obs": |
||
188 | Centered 81-day arithmetic average of F10.7 (observed). |
||
189 | "f107_81lst_obs": |
||
190 | Last 81-day arithmetic average of F10.7 (observed). |
||
191 | """ |
||
192 | _assert_file_exists(swpath) |
||
193 | sw = np.genfromtxt( |
||
194 | swpath, |
||
195 | skip_header=3, |
||
196 | delimiter=[ |
||
197 | # yy mm dd br rd kp kp kp kp kp kp kp kp Kp |
||
198 | 4, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, |
||
199 | # ap ap ap ap ap ap ap ap Ap cp c9 is f1 q |
||
200 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 6, 2, |
||
201 | # f2 f3 f4 f5 f6 |
||
202 | 6, 6, 6, 6, 6], |
||
203 | dtype=( |
||
204 | "i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4," |
||
205 | "i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,i4,f8,i4," |
||
206 | "f8,f8,f8,f8,f8" |
||
207 | ), |
||
208 | names=[ |
||
209 | "year", "month", "day", "bsrn", "rotd", |
||
210 | "Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum", |
||
211 | "Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg", |
||
212 | "Cp", "C9", "isn", "f107_adj", "Q", "f107_81ctr_adj", "f107_81lst_adj", |
||
213 | "f107_obs", "f107_81ctr_obs", "f107_81lst_obs" |
||
214 | ] |
||
215 | )[2:-1] |
||
216 | sw = sw[sw["year"] != -1] |
||
217 | ts = pd.to_datetime([ |
||
218 | "{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd) |
||
219 | for yy, mm, dd in sw[["year", "month", "day"]] |
||
220 | ]) |
||
221 | sw_df = pd.DataFrame(sw, index=ts) |
||
222 | # Adjust Kp to 0...9 |
||
223 | kpns = list(map("Kp{0}".format, range(0, 23, 3))) + ["Kpsum"] |
||
224 | sw_df[kpns] = 0.1 * sw_df[kpns] |
||
225 | return sw_df |
||
226 | |||
227 | |||
228 | # Common arguments for the public daily and 3h interfaces |
||
229 | _SW_COMMON_PARAMS = """ |
||
230 | Parameters |
||
231 | ---------- |
||
232 | swpath_all: str, optional, default depending on package install location |
||
233 | Filename for the large combined index file including the |
||
234 | historic data, absolute path or relative to the current dir. |
||
235 | swpath_5y: str, optional, default depending on package install location |
||
236 | Filename for the 5-year index file, absolute path or relative to the current dir. |
||
237 | update: bool, optional, default False |
||
238 | Attempt to update the local data if it is older than `update_interval`. |
||
239 | update_interval: str, optional, default "30days" |
||
240 | The time after which the data are considered "old". |
||
241 | By default, no automatic re-download is initiated, set `update` to true. |
||
242 | The online data is updated every 3 hours, thus setting this value to |
||
243 | a shorter time is not needed and not recommended. |
||
244 | """ |
||
245 | |||
246 | |||
247 | def _doc_param(**sub): |
||
248 | def dec(obj): |
||
249 | obj.__doc__ = obj.__doc__.format(**sub) |
||
250 | return obj |
||
251 | return dec |
||
252 | |||
253 | |||
254 | @_doc_param(params=_SW_COMMON_PARAMS) |
||
255 | def sw_daily(swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y, update=False, update_interval="30days"): |
||
256 | """Combined daily Ap, Kp, and f10.7 index values |
||
257 | |||
258 | Combines the "historic" and last-5-year data into one dataframe. |
||
259 | |||
260 | All arguments are optional and changing them from the defaults should not |
||
261 | be required neither should it be necessary nor is it recommended. |
||
262 | {params} |
||
263 | Returns |
||
264 | ------- |
||
265 | sw_df: pandas.DataFrame |
||
266 | The combined parsed space weather data (daily values). |
||
267 | Raises ``IOError`` if the data files cannot be found. |
||
268 | |||
269 | See Also |
||
270 | -------- |
||
271 | ap_kp_3h, read_sw |
||
272 | """ |
||
273 | # ensure that the file exists and is up to date |
||
274 | if ( |
||
275 | not os.path.exists(swpath_all) |
||
276 | or not os.path.exists(swpath_5y) |
||
277 | ): |
||
278 | warn("Could not find space weather data, trying to download.") |
||
279 | update_data(swpath_all=swpath_all, swpath_5y=swpath_5y) |
||
280 | |||
281 | if ( |
||
282 | # 1460 = 4 * 365 |
||
283 | get_file_age(swpath_all) > pd.Timedelta("1460days") |
||
284 | or get_file_age(swpath_5y) > pd.Timedelta(update_interval) |
||
285 | ): |
||
286 | if update: |
||
287 | update_data(swpath_all=swpath_all, swpath_5y=swpath_5y) |
||
288 | else: |
||
289 | warn( |
||
290 | "Local data files are older than {0}, pass `update=True` or " |
||
291 | "run `sw.update_data()` manually if you need newer data.".format( |
||
292 | update_interval |
||
293 | ) |
||
294 | ) |
||
295 | |||
296 | df_all = read_sw(swpath_all) |
||
297 | df_5y = read_sw(swpath_5y) |
||
298 | return pd.concat([df_all[:df_5y.index[0]], df_5y[1:]]) |
||
299 | |||
300 | |||
301 | @_doc_param(params=_SW_COMMON_PARAMS) |
||
302 | def ap_kp_3h(*args, **kwargs): |
||
303 | """3h values of Ap and Kp |
||
304 | |||
305 | Provides the 3-hourly Ap and Kp indices from the full daily data set. |
||
306 | |||
307 | Accepts the same arguments as `sw_daily()`. |
||
308 | All arguments are optional and changing them from the defaults should not |
||
309 | be required neither should it be necessary nor is it recommended. |
||
310 | {params} |
||
311 | Returns |
||
312 | ------- |
||
313 | sw_df: pandas.DataFrame |
||
314 | The combined Ap and Kp index data (3h values). |
||
315 | The index values are centred at the 3h interval, i.e. at 01:30:00, |
||
316 | 04:30:00, 07:30:00, ... and so on. |
||
317 | Raises ``IOError`` if the data files cannot be found. |
||
318 | |||
319 | See Also |
||
320 | -------- |
||
321 | sw_daily |
||
322 | """ |
||
323 | daily_df = sw_daily(*args, **kwargs) |
||
324 | ret = daily_df.copy() |
||
325 | apns = list(map("Ap{0}".format, range(0, 23, 3))) |
||
326 | kpns = list(map("Kp{0}".format, range(0, 23, 3))) |
||
327 | for i, (ap, kp) in enumerate(zip(apns, kpns)): |
||
328 | ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h") |
||
329 | ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h") |
||
330 | sw_ap = pd.concat(map(ret.__getitem__, apns)) |
||
331 | sw_kp = pd.concat(map(ret.__getitem__, kpns)) |
||
332 | df = pd.DataFrame({"Ap": sw_ap, "Kp": sw_kp}) |
||
333 | return df.reindex(df.index.sort_values()) |
||
334 |