1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
# Copyright (c) 2022 Stefan Bender |
3
|
|
|
# |
4
|
|
|
# This module is part of pyspaceweather. |
5
|
|
|
# pyspaceweather is free software: you can redistribute it or modify |
6
|
|
|
# it under the terms of the GNU General Public License as published |
7
|
|
|
# by the Free Software Foundation, version 2. |
8
|
|
|
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html. |
9
|
|
|
"""Python interface for OMNI space weather data |
10
|
|
|
|
11
|
|
|
Omni2 [#]_ space weather data interface for python. |
12
|
|
|
|
13
|
|
|
.. [#] https://omniweb.gsfc.nasa.gov/ow.html |
14
|
|
|
""" |
15
|
|
|
import os |
16
|
|
|
import logging |
17
|
|
|
from warnings import warn |
18
|
|
|
|
19
|
|
|
from posixpath import join as urljoin |
20
|
|
|
|
21
|
|
|
import numpy as np |
22
|
|
|
import pandas as pd |
23
|
|
|
|
24
|
|
|
from .core import _assert_file_exists, _dl_file, _resource_filepath |
25
|
|
|
|
26
|
|
|
__all__ = [ |
27
|
|
|
"cache_omnie", |
28
|
|
|
"omnie_hourly", |
29
|
|
|
"omnie_mask_missing", |
30
|
|
|
"read_omnie", |
31
|
|
|
] |
32
|
|
|
|
33
|
|
|
OMNI_URL_BASE = "https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended" |
34
|
|
|
OMNI_PREFIX, OMNI_EXT = "omni2", "dat" |
35
|
|
|
OMNI_SUBDIR = "omni_extended" |
36
|
|
|
LOCAL_PATH = _resource_filepath(OMNI_SUBDIR) |
37
|
|
|
|
38
|
|
|
_OMNI_MISSING = { |
39
|
|
|
"year": None, |
40
|
|
|
"doy": None, |
41
|
|
|
"hour": None, |
42
|
|
|
"bsrn": 9999, |
43
|
|
|
"id_imf": 99, |
44
|
|
|
"id_sw": 99, |
45
|
|
|
"n_imf": 999, |
46
|
|
|
"n_plasma": 999, |
47
|
|
|
"B_mag_avg": 999.9, |
48
|
|
|
"B_mag": 999.9, |
49
|
|
|
"theta_B": 999.9, |
50
|
|
|
"phi_B": 999.9, |
51
|
|
|
"B_x": 999.9, |
52
|
|
|
"B_y_GSE": 999.9, |
53
|
|
|
"B_z_GSE": 999.9, |
54
|
|
|
"B_y_GSM": 999.9, |
55
|
|
|
"B_z_GSM": 999.9, |
56
|
|
|
"sigma_B_mag_avg": 999.9, |
57
|
|
|
"sigma_B_mag": 999.9, |
58
|
|
|
"sigma_B_x_GSE": 999.9, |
59
|
|
|
"sigma_B_y_GSE": 999.9, |
60
|
|
|
"sigma_B_z_GSE": 999.9, |
61
|
|
|
"T_p": 9999999.0, |
62
|
|
|
"n_p": 999.9, |
63
|
|
|
"v_plasma": 9999.0, |
64
|
|
|
"phi_v": 999.9, |
65
|
|
|
"theta_v": 999.9, |
66
|
|
|
"n_alpha_n_p": 9.999, |
67
|
|
|
"p_flow": 99.99, |
68
|
|
|
"sigma_T": 9999999.0, |
69
|
|
|
"sigma_n": 999.9, |
70
|
|
|
"sigma_v": 9999.0, |
71
|
|
|
"sigma_phi_v": 999.9, |
72
|
|
|
"sigma_theta_v": 999.9, |
73
|
|
|
"sigma_na_np": 9.999, |
74
|
|
|
"E": 999.99, |
75
|
|
|
"beta_plasma": 999.99, |
76
|
|
|
"mach": 999.9, |
77
|
|
|
"Kp": 9.9, |
78
|
|
|
"R": 999, |
79
|
|
|
"Dst": 99999, |
80
|
|
|
"AE": 9999, |
81
|
|
|
"p_01MeV": 999999.99, |
82
|
|
|
"p_02MeV": 99999.99, |
83
|
|
|
"p_04MeV": 99999.99, |
84
|
|
|
"p_10MeV": 99999.99, |
85
|
|
|
"p_30MeV": 99999.99, |
86
|
|
|
"p_60MeV": 99999.99, |
87
|
|
|
"flag": 0, |
88
|
|
|
"Ap": 999, |
89
|
|
|
"f107_adj": 999.9, |
90
|
|
|
"PC": 999.9, |
91
|
|
|
"AL": 99999, |
92
|
|
|
"AU": 99999, |
93
|
|
|
"mach_mag": 99.9, |
94
|
|
|
"Lya": 0.999999, |
95
|
|
|
"QI_p": 9.9999 |
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
|
99
|
|
|
def _doc_param(**sub): |
100
|
|
|
def dec(obj): |
101
|
|
|
obj.__doc__ = obj.__doc__.format(**sub) |
102
|
|
|
return obj |
103
|
|
|
return dec |
104
|
|
|
|
105
|
|
|
|
106
|
|
|
@_doc_param(prefix=OMNI_PREFIX, ext=OMNI_EXT) |
107
|
|
|
def cache_omnie( |
108
|
|
|
year, |
109
|
|
|
prefix=None, |
110
|
|
|
ext=None, |
111
|
|
|
local_path=None, |
112
|
|
|
url_base=None, |
113
|
|
|
): |
114
|
|
|
"""Download OMNI2 data to local cache |
115
|
|
|
|
116
|
|
|
Downloads the OMNI2 (extended) data file from [#]_ to the local location. |
117
|
|
|
|
118
|
|
|
.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/ |
119
|
|
|
|
120
|
|
|
Parameters |
121
|
|
|
---------- |
122
|
|
|
year: int |
123
|
|
|
Year of the data. |
124
|
|
|
prefix: `None` or str, optional, default `None` |
125
|
|
|
File prefix for constructing the file name as <prefix>_year.<ext>. |
126
|
|
|
`None` defaults to '{prefix}'. |
127
|
|
|
ext: `None` or str, optional, default `None` |
128
|
|
|
File extension for constructing the file name as <prefix>_year.<ext>. |
129
|
|
|
`None` defaults to '{ext}'. |
130
|
|
|
local_path: `None` or str, optional, default `None` |
131
|
|
|
Path to the locally stored data yearly files, defaults to the |
132
|
|
|
data location within the package. |
133
|
|
|
`None` uses the package's default file location. |
134
|
|
|
url_base: `None` or str, optional, default `None` |
135
|
|
|
URL for the directory that contains the yearly files. |
136
|
|
|
`None` uses the default base url. |
137
|
|
|
|
138
|
|
|
Returns |
139
|
|
|
------- |
140
|
|
|
Nothing. |
141
|
|
|
""" |
142
|
|
|
prefix = prefix or OMNI_PREFIX |
143
|
|
|
ext = ext or OMNI_EXT |
144
|
|
|
local_path = local_path or LOCAL_PATH |
145
|
|
|
url_base = url_base or OMNI_URL_BASE |
146
|
|
|
|
147
|
|
|
basename = "{0}_{1:04d}.{2}".format(prefix, year, ext) |
148
|
|
|
|
149
|
|
|
if not os.path.exists(local_path): |
150
|
|
|
os.makedirs(local_path) |
151
|
|
|
|
152
|
|
|
omnie_file = os.path.join(local_path, basename) |
153
|
|
|
if not os.path.exists(omnie_file): |
154
|
|
|
url = urljoin(url_base, basename) |
155
|
|
|
logging.info("%s not found, downloading from %s.", omnie_file, url) |
156
|
|
|
_dl_file(omnie_file, url) |
157
|
|
|
|
158
|
|
|
|
159
|
|
|
def omnie_mask_missing(df): |
160
|
|
|
"""Mask missing values with NaN |
161
|
|
|
|
162
|
|
|
Marks missing values in the OMNI2 data set by NaN. |
163
|
|
|
The missing value indicating numbers are taken from the file format description |
164
|
|
|
https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended |
165
|
|
|
|
166
|
|
|
Parameters |
167
|
|
|
---------- |
168
|
|
|
df: pandas.DataFrame |
169
|
|
|
The OMNI2 data set, e.g. from :func:`omnie_hourly()` or :func:`read_omnie()`. |
170
|
|
|
|
171
|
|
|
Returns |
172
|
|
|
------- |
173
|
|
|
df: pandas.DataFrame |
174
|
|
|
The same dataframe with the missing values masked with ``numpy.nan``. |
175
|
|
|
|
176
|
|
|
Notes |
177
|
|
|
----- |
178
|
|
|
This function returns a copy of the dataframe, and all the integer columns |
179
|
|
|
will be converted to float to support NaN. |
180
|
|
|
""" |
181
|
|
|
res = df.copy() |
182
|
|
|
for _c in df.columns: |
183
|
|
|
_m = _OMNI_MISSING.get(_c, None) |
184
|
|
|
if _m is None: |
185
|
|
|
continue |
186
|
|
|
_mask = df[_c] != _m |
187
|
|
|
res[_c] = df[_c].where(_mask) |
188
|
|
|
return res |
189
|
|
|
|
190
|
|
|
|
191
|
|
|
def read_omnie(omnie_file): |
192
|
|
|
"""Read and parse OMNI2 extended files [#]_ |
193
|
|
|
|
194
|
|
|
Parses the Omni2 extended data files, available at [#]_, |
195
|
|
|
into a :class:`pandas.DataFrame`. |
196
|
|
|
|
197
|
|
|
.. [#] https://omniweb.gsfc.nasa.gov/ow.html |
198
|
|
|
.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/ |
199
|
|
|
|
200
|
|
|
Parameters |
201
|
|
|
---------- |
202
|
|
|
omnie_file: str |
203
|
|
|
File to parse, absolute path or relative to the current dir. |
204
|
|
|
|
205
|
|
|
Returns |
206
|
|
|
------- |
207
|
|
|
sw_df: pandas.DataFrame |
208
|
|
|
The parsed OMNI2 space weather data (hourly values). |
209
|
|
|
Details in |
210
|
|
|
https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended |
211
|
|
|
|
212
|
|
|
Raises an ``IOError`` if the file is not found. |
213
|
|
|
The index is returned timezone-naive but contains UTC timestamps. |
214
|
|
|
To convert to a timezone-aware index, use |
215
|
|
|
:meth:`pandas.DataFrame.tz_localize()`: ``sw_df.tz_localize("utc")``. |
216
|
|
|
|
217
|
|
|
The dataframe contains the following columns: |
218
|
|
|
|
219
|
|
|
year: |
220
|
|
|
The observation year |
221
|
|
|
doy: |
222
|
|
|
Day of the year |
223
|
|
|
hour: |
224
|
|
|
Hour of the day |
225
|
|
|
bsrn: |
226
|
|
|
Bartels Solar Rotation Number. |
227
|
|
|
id_imf: |
228
|
|
|
ID for IMF spacecraft |
229
|
|
|
id_sw: |
230
|
|
|
ID for SW plasma spacecraft |
231
|
|
|
n_imf: |
232
|
|
|
Number of points in IMF averages |
233
|
|
|
n_plasma: |
234
|
|
|
Numberof points in plasma averages |
235
|
|
|
B_mag_avg: |
236
|
|
|
Magnetic field magnitude average B |
237
|
|
|
B_mag: |
238
|
|
|
Magnetic field vector magnitude |
239
|
|
|
theta_B: |
240
|
|
|
Latitude angle of the magnetic field vector |
241
|
|
|
phi_B: |
242
|
|
|
Longitude angle of the magnetic field vector |
243
|
|
|
B_x: |
244
|
|
|
B_x GSE, GSM |
245
|
|
|
B_y_GSE: |
246
|
|
|
B_y GSE |
247
|
|
|
B_z_GSE: |
248
|
|
|
B_z GSE |
249
|
|
|
B_y_GSM: |
250
|
|
|
B_y GSM |
251
|
|
|
B_z_GSM: |
252
|
|
|
B_z GSM |
253
|
|
|
sigma_B_mag_avg: |
254
|
|
|
RMS standard deviation of B_mag_avg |
255
|
|
|
sigma_B_mag: |
256
|
|
|
RMS standard deviation of B_mag |
257
|
|
|
sigma_B_x_GSE: |
258
|
|
|
RMS standard deviation of B_x_GSE |
259
|
|
|
sigma_B_y_GSE: |
260
|
|
|
RMS standard deviation of B_y_GSE |
261
|
|
|
sigma_B_z_GSE: |
262
|
|
|
RMS standard deviation of B_z_GSE |
263
|
|
|
T_p: |
264
|
|
|
Proton temperature |
265
|
|
|
n_p: |
266
|
|
|
Proton density |
267
|
|
|
v_plasma: |
268
|
|
|
Plasma flow speed |
269
|
|
|
phi_v: |
270
|
|
|
Plasma flow longitude angle |
271
|
|
|
theta_v: |
272
|
|
|
Plasma flow latitude angle |
273
|
|
|
n_alpha_n_p: |
274
|
|
|
Alpha/Proton ratio |
275
|
|
|
p_flow: |
276
|
|
|
Flow pressure |
277
|
|
|
sigma_T: |
278
|
|
|
Standard deviation of T_p |
279
|
|
|
sigma_n: |
280
|
|
|
Standard deviation of n_p |
281
|
|
|
sigma_v: |
282
|
|
|
Standard deviation of v_plasma |
283
|
|
|
sigma_phi_v: |
284
|
|
|
Standard deviation of phi_v |
285
|
|
|
sigma_theta_v: |
286
|
|
|
Standard deviation of theta_v |
287
|
|
|
sigma_na_np: |
288
|
|
|
Standard deviation of n_alpha_n_p |
289
|
|
|
E: |
290
|
|
|
Electric field magnitude |
291
|
|
|
beta_plasma: |
292
|
|
|
Plasma beta |
293
|
|
|
mach: |
294
|
|
|
Alfvén Mach number |
295
|
|
|
Kp: |
296
|
|
|
Kp index value |
297
|
|
|
R: |
298
|
|
|
Sunspot number |
299
|
|
|
Dst: |
300
|
|
|
Dst index value |
301
|
|
|
AE: |
302
|
|
|
AE index value |
303
|
|
|
p_01MeV, p_02MeV, p_04MeV, p_10MeV, p_30MeV, p_60MeV: |
304
|
|
|
Proton fluxes >1 MeV, >2 MeV, >4 MeV, >10 MeV, >30 MeV, > 60 MeV |
305
|
|
|
flag: |
306
|
|
|
Flag (-1, ..., 6) |
307
|
|
|
Ap: |
308
|
|
|
Ap index value |
309
|
|
|
f107_adj: |
310
|
|
|
F10.7 radio flux at 1 AU |
311
|
|
|
PC: |
312
|
|
|
PC index value |
313
|
|
|
AL, AU: |
314
|
|
|
AL and AU index values |
315
|
|
|
mach_mag: |
316
|
|
|
Magnetosonic Mach number |
317
|
|
|
|
318
|
|
|
The extended dataset contains the addional columns: |
319
|
|
|
|
320
|
|
|
Lya: |
321
|
|
|
Solar Lyman-alpha irradiance |
322
|
|
|
QI_p: |
323
|
|
|
Proton QI |
324
|
|
|
""" |
325
|
|
|
_assert_file_exists(omnie_file) |
326
|
|
|
# FORMAT( |
327
|
|
|
# 2I4,I3,I5,2I3,2I4,14F6.1,F9.0,F6.1,F6.0,2F6.1,F6.3,F6.2, |
328
|
|
|
# F9.0,F6.1,F6.0,2F6.1,F6.3,2F7.2,F6.1,I3,I4,I6,I5,F10.2, |
329
|
|
|
# 5F9.2,I3,I4,2F6.1,2I6,F5.1,F9.6,F7.4 |
330
|
|
|
# ) |
331
|
|
|
sw = np.genfromtxt( |
332
|
|
|
omnie_file, |
333
|
|
|
skip_header=0, |
334
|
|
|
delimiter=[ |
335
|
|
|
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
336
|
|
|
# yy dd hr br i1 i2 n1 n2 B B' tB fB Bx By Bz By Bz sB sB sB |
337
|
|
|
4, 4, 3, 5, 3, 3, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
338
|
|
|
# 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
339
|
|
|
# sB sB Tp np v fv tv nr p sT sn sv sf st sr E bp M Kp R |
340
|
|
|
6, 6, 9, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 7, 7, 6, 3, 4, |
341
|
|
|
# 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
342
|
|
|
# Ds AE p1 p2 p4p10p30p60 fl Apf10 PC AL AU Mm La QI |
343
|
|
|
6, 5,10, 9, 9, 9, 9, 9, 3, 4, 6, 6, 6, 6, 5, 9, 7, |
344
|
|
|
], |
345
|
|
|
dtype=( |
346
|
|
|
"i4,i4,i4,i4,i4,i4,i4,i4,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8," |
347
|
|
|
"f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,i4,i4," |
348
|
|
|
"i4,i4,f8,f8,f8,f8,f8,f8,i4,i4,f8,f8,i4,i4,f8,f8,f8" |
349
|
|
|
), |
350
|
|
|
names=[ |
351
|
|
|
"year", "doy", "hour", "bsrn", "id_imf", "id_sw", "n_imf", "n_plasma", |
352
|
|
|
"B_mag_avg", "B_mag", "theta_B", "phi_B", |
353
|
|
|
"B_x", "B_y_GSE", "B_z_GSE", "B_y_GSM", "B_z_GSM", |
354
|
|
|
"sigma_B_mag_avg", "sigma_B_mag", |
355
|
|
|
"sigma_B_x_GSE", "sigma_B_y_GSE", "sigma_B_z_GSE", |
356
|
|
|
"T_p", "n_p", "v_plasma", "phi_v", "theta_v", "n_alpha_n_p", "p_flow", |
357
|
|
|
"sigma_T", "sigma_n", "sigma_v", |
358
|
|
|
"sigma_phi_v", "sigma_theta_v", "sigma_na_np", |
359
|
|
|
"E", "beta_plasma", "mach", "Kp", "R", "Dst", "AE", |
360
|
|
|
"p_01MeV", "p_02MeV", "p_04MeV", "p_10MeV", "p_30MeV", "p_60MeV", |
361
|
|
|
"flag", "Ap", "f107_adj", "PC", "AL", "AU", "mach_mag", "Lya", "QI_p", |
362
|
|
|
] |
363
|
|
|
) |
364
|
|
|
sw = sw[sw["year"] != -1] |
365
|
|
|
ts = pd.to_datetime( |
366
|
|
|
[ |
367
|
|
|
"{0:04d}.{1:03d} {2:02d}".format(yy, dd, hh) |
368
|
|
|
for yy, dd, hh in sw[["year", "doy", "hour"]] |
369
|
|
|
], |
370
|
|
|
format="%Y.%j %H", |
371
|
|
|
) |
372
|
|
|
sw_df = pd.DataFrame(sw, index=ts) |
373
|
|
|
# Adjust Kp to 0...9 |
374
|
|
|
sw_df["Kp"] = 0.1 * sw_df["Kp"] |
375
|
|
|
return sw_df |
376
|
|
|
|
377
|
|
|
|
378
|
|
|
@_doc_param(prefix=OMNI_PREFIX, ext=OMNI_EXT) |
379
|
|
|
def omnie_hourly( |
380
|
|
|
year, |
381
|
|
|
prefix=None, |
382
|
|
|
ext=None, |
383
|
|
|
local_path=None, |
384
|
|
|
url_base=None, |
385
|
|
|
cache=False, |
386
|
|
|
): |
387
|
|
|
"""OMNI hourly data for year `year` |
388
|
|
|
|
389
|
|
|
Loads the OMNI hourly data for the given year, |
390
|
|
|
from the locally cached data. |
391
|
|
|
Use `local_path` to set a custom location if you |
392
|
|
|
have the omni data already available. |
393
|
|
|
|
394
|
|
|
Parameters |
395
|
|
|
---------- |
396
|
|
|
year: int |
397
|
|
|
Year of the data. |
398
|
|
|
prefix: `None` or str, optional, default `None` |
399
|
|
|
File prefix for constructing the file name as <prefix>_year.<ext>. |
400
|
|
|
`None` defaults to '{prefix}'. |
401
|
|
|
ext: `None` or str, optional, default `None` |
402
|
|
|
File extension for constructing the file name as <prefix>_year.<ext>. |
403
|
|
|
`None` defaults to '{ext}'. |
404
|
|
|
local_path: `None` or str, optional, default `None` |
405
|
|
|
Path to the locally stored data yearly files, defaults to the |
406
|
|
|
data location within the package. |
407
|
|
|
`None` uses the package's default file location. |
408
|
|
|
url_base: `None` or str, optional, default `None` |
409
|
|
|
URL for the directory that contains the yearly files. |
410
|
|
|
`None` uses the default base url. |
411
|
|
|
cache: boolean, optional, default False |
412
|
|
|
Download files locally if they are not already available. |
413
|
|
|
|
414
|
|
|
Returns |
415
|
|
|
------- |
416
|
|
|
sw_df: pandas.DataFrame |
417
|
|
|
The parsed space weather data (hourly values). |
418
|
|
|
|
419
|
|
|
Raises an ``IOError`` if the file is not available. |
420
|
|
|
The index is returned timezone-naive but contains UTC timestamps. |
421
|
|
|
To convert to a timezone-aware index, use |
422
|
|
|
:meth:`pandas.DataFrame.tz_localize()`: ``sw_df.tz_localize("utc")``. |
423
|
|
|
|
424
|
|
|
See Also |
425
|
|
|
-------- |
426
|
|
|
read_omnie |
427
|
|
|
""" |
428
|
|
|
prefix = prefix or OMNI_PREFIX |
429
|
|
|
ext = ext or OMNI_EXT |
430
|
|
|
local_path = local_path or LOCAL_PATH |
431
|
|
|
url_base = url_base or OMNI_URL_BASE |
432
|
|
|
|
433
|
|
|
basename = "{0}_{1:04d}.{2}".format(prefix, year, ext) |
434
|
|
|
omnie_file = os.path.join(local_path, basename) |
435
|
|
|
|
436
|
|
|
# ensure that the file exists |
437
|
|
|
if not os.path.exists(omnie_file): |
438
|
|
|
warn("Could not find OMNI2 data {0}.".format(omnie_file)) |
439
|
|
|
if cache: |
440
|
|
|
cache_omnie( |
441
|
|
|
year, |
442
|
|
|
prefix=prefix, ext=ext, |
443
|
|
|
local_path=local_path, url_base=url_base, |
444
|
|
|
) |
445
|
|
|
else: |
446
|
|
|
warn( |
447
|
|
|
"Local data files not found, pass `cache=True` " |
448
|
|
|
"or run `sw.cache_omnie()` to download the file." |
449
|
|
|
) |
450
|
|
|
|
451
|
|
|
return read_omnie(omnie_file) |
452
|
|
|
|