spaceweather.omni.omnie_mask_missing()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 30
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
nop 1
dl 0
loc 30
rs 9.95
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
# Copyright (c) 2022 Stefan Bender
3
#
4
# This module is part of pyspaceweather.
5
# pyspaceweather is free software: you can redistribute it or modify
6
# it under the terms of the GNU General Public License as published
7
# by the Free Software Foundation, version 2.
8
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
9
"""Python interface for OMNI space weather data
10
11
Omni2 [#]_ space weather data interface for python.
12
13
.. [#] https://omniweb.gsfc.nasa.gov/ow.html
14
"""
15
import os
16
from pkg_resources import resource_filename
17
import logging
18
from warnings import warn
19
20
from posixpath import join as urljoin
21
22
import numpy as np
23
import pandas as pd
24
25
from .core import _assert_file_exists, _dl_file
26
27
__all__ = [
28
	"cache_omnie",
29
	"omnie_hourly",
30
	"omnie_mask_missing",
31
	"read_omnie",
32
]
33
34
OMNI_URL_BASE = "https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended"
35
OMNI_PREFIX, OMNI_EXT = "omni2", "dat"
36
OMNI_SUBDIR = "omni_extended"
37
LOCAL_PATH = resource_filename(__name__, os.path.join("data", OMNI_SUBDIR))
38
39
_OMNI_MISSING = {
40
	"year": None,
41
	"doy": None,
42
	"hour": None,
43
	"bsrn": 9999,
44
	"id_imf": 99,
45
	"id_sw": 99,
46
	"n_imf": 999,
47
	"n_plasma": 999,
48
	"B_mag_avg": 999.9,
49
	"B_mag": 999.9,
50
	"theta_B": 999.9,
51
	"phi_B": 999.9,
52
	"B_x": 999.9,
53
	"B_y_GSE": 999.9,
54
	"B_z_GSE": 999.9,
55
	"B_y_GSM": 999.9,
56
	"B_z_GSM": 999.9,
57
	"sigma_B_mag_avg": 999.9,
58
	"sigma_B_mag": 999.9,
59
	"sigma_B_x_GSE": 999.9,
60
	"sigma_B_y_GSE": 999.9,
61
	"sigma_B_z_GSE": 999.9,
62
	"T_p": 9999999.0,
63
	"n_p": 999.9,
64
	"v_plasma": 9999.0,
65
	"phi_v": 999.9,
66
	"theta_v": 999.9,
67
	"n_alpha_n_p": 9.999,
68
	"p_flow": 99.99,
69
	"sigma_T": 9999999.0,
70
	"sigma_n": 999.9,
71
	"sigma_v": 9999.0,
72
	"sigma_phi_v": 999.9,
73
	"sigma_theta_v": 999.9,
74
	"sigma_na_np": 9.999,
75
	"E": 999.99,
76
	"beta_plasma": 999.99,
77
	"mach": 999.9,
78
	"Kp": 9.9,
79
	"R": 999,
80
	"Dst": 99999,
81
	"AE": 9999,
82
	"p_01MeV": 999999.99,
83
	"p_02MeV": 99999.99,
84
	"p_04MeV": 99999.99,
85
	"p_10MeV": 99999.99,
86
	"p_30MeV": 99999.99,
87
	"p_60MeV": 99999.99,
88
	"flag": 0,
89
	"Ap": 999,
90
	"f107_adj": 999.9,
91
	"PC": 999.9,
92
	"AL": 99999,
93
	"AU": 99999,
94
	"mach_mag": 99.9,
95
	"Lya": 0.999999,
96
	"QI_p": 9.9999
97
}
98
99
100
def cache_omnie(
101
	year,
102
	prefix=None,
103
	ext=None,
104
	local_path=None,
105
	url_base=None,
106
):
107
	"""Download OMNI2 data to local cache
108
109
	Downloads the OMNI2 (extended) data file from [#]_ to the local location.
110
111
	.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/
112
113
	Parameters
114
	----------
115
	year: int
116
		Year of the data.
117
	prefix: str, optional
118
		File prefix for constructing the file name as <prefix>_year.<ext>.
119
		Defaults to 'omni2'.
120
	ext: str, optional
121
		File extension for constructing the file name as <prefix>_year.<ext>.
122
		Defaults to 'dat'.
123
	local_path: str, optional
124
		Path to the locally stored data yearly files, defaults to the
125
		data location within the package.
126
	url_base: str, optional
127
		URL for the directory that contains the yearly files.
128
129
	Returns
130
	-------
131
	Nothing.
132
	"""
133
	prefix = prefix or OMNI_PREFIX
134
	ext = ext or OMNI_EXT
135
	local_path = local_path or LOCAL_PATH
136
	url_base = url_base or OMNI_URL_BASE
137
138
	basename = "{0}_{1:04d}.{2}".format(prefix, year, ext)
139
140
	if not os.path.exists(local_path):
141
		os.makedirs(local_path)
142
143
	omnie_file = os.path.join(local_path, basename)
144
	if not os.path.exists(omnie_file):
145
		url = urljoin(url_base, basename)
146
		logging.info("%s not found, downloading from %s.", omnie_file, url)
147
		_dl_file(omnie_file, url)
148
149
150
def omnie_mask_missing(df):
151
	"""Mask missing values with NaN
152
153
	Marks missing values in the OMNI2 data set by NaN.
154
	The missing value indicating numbers are taken from the file format description
155
	https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended
156
157
	Parameters
158
	----------
159
	df: pandas.DataFrame
160
		The OMNI2 data set, e.g. from :func:`omnie_hourly()` or :func:`read_omnie()`.
161
162
	Returns
163
	-------
164
	df: pandas.DataFrame
165
		The same dataframe with the missing values masked with ``numpy.nan``.
166
167
	Notes
168
	-----
169
	This function returns a copy of the dataframe, and all the integer columns
170
	will be converted to float to support NaN.
171
	"""
172
	res = df.copy()
173
	for _c in df.columns:
174
		_m = _OMNI_MISSING.get(_c, None)
175
		if _m is None:
176
			continue
177
		_mask = df[_c] != _m
178
		res[_c] = df[_c].where(_mask)
179
	return res
180
181
182
def read_omnie(omnie_file):
183
	"""Read and parse OMNI2 extended files [#]_
184
185
	Parses the Omni2 extended data files,  available at [#]_,
186
	into a :class:`pandas.DataFrame`.
187
188
	.. [#] https://omniweb.gsfc.nasa.gov/ow.html
189
	.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/
190
191
	Parameters
192
	----------
193
	omnie_file: str
194
		File to parse, absolute path or relative to the current dir.
195
196
	Returns
197
	-------
198
	sw_df: pandas.DataFrame
199
		The parsed OMNI2 space weather data (hourly values).
200
		Details in
201
		https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended
202
203
		Raises an ``IOError`` if the file is not found.
204
205
		The dataframe contains the following columns:
206
207
		year:
208
			The observation year
209
		doy:
210
			Day of the year
211
		hour:
212
			Hour of the day
213
		bsrn:
214
			Bartels Solar Rotation Number.
215
		id_imf:
216
			ID for IMF spacecraft
217
		id_sw:
218
			ID for SW plasma spacecraft
219
		n_imf:
220
			Number of points in IMF averages
221
		n_plasma:
222
			Numberof points in plasma averages
223
		B_mag_avg:
224
			Magnetic field magnitude average B
225
		B_mag:
226
			Magnetic field vector magnitude
227
		theta_B:
228
			Latitude angle of the magnetic field vector
229
		phi_B:
230
			Longitude angle of the magnetic field vector
231
		B_x:
232
			B_x GSE, GSM
233
		B_y_GSE:
234
			B_y GSE
235
		B_z_GSE:
236
			B_z GSE
237
		B_y_GSM:
238
			B_y GSM
239
		B_z_GSM:
240
			B_z GSM
241
		sigma_B_mag_avg:
242
			RMS standard deviation of B_mag_avg
243
		sigma_B_mag:
244
			RMS standard deviation of B_mag
245
		sigma_B_x_GSE:
246
			RMS standard deviation of B_x_GSE
247
		sigma_B_y_GSE:
248
			RMS standard deviation of B_y_GSE
249
		sigma_B_z_GSE:
250
			RMS standard deviation of B_z_GSE
251
		T_p:
252
			Proton temperature
253
		n_p:
254
			Proton density
255
		v_plasma:
256
			Plasma flow speed
257
		phi_v:
258
			Plasma flow longitude angle
259
		theta_v:
260
			Plasma flow latitude angle
261
		n_alpha_n_p:
262
			Alpha/Proton ratio
263
		p_flow:
264
			Flow pressure
265
		sigma_T:
266
			Standard deviation of T_p
267
		sigma_n:
268
			Standard deviation of n_p
269
		sigma_v:
270
			Standard deviation of v_plasma
271
		sigma_phi_v:
272
			Standard deviation of phi_v
273
		sigma_theta_v:
274
			Standard deviation of theta_v
275
		sigma_na_np:
276
			Standard deviation of n_alpha_n_p
277
		E:
278
			Electric field magnitude
279
		beta_plasma:
280
			Plasma beta
281
		mach:
282
			Alfvén Mach number
283
		Kp:
284
			Kp index value
285
		R:
286
			Sunspot number
287
		Dst:
288
			Dst index value
289
		AE:
290
			AE index value
291
		p_01MeV, p_02MeV, p_04MeV, p_10MeV, p_30MeV, p_60MeV:
292
			Proton fluxes >1 MeV, >2 MeV, >4 MeV, >10 MeV, >30 MeV, > 60 MeV
293
		flag:
294
			Flag (-1, ..., 6)
295
		Ap:
296
			Ap index value
297
		f107_adj:
298
			F10.7 radio flux at 1 AU
299
		PC:
300
			PC index value
301
		AL, AU:
302
			AL and AU index values
303
		mach_mag:
304
			Magnetosonic Mach number
305
306
		The extended dataset contains the addional columns:
307
308
		Lya:
309
			Solar Lyman-alpha irradiance
310
		QI_p:
311
			Proton QI
312
	"""
313
	_assert_file_exists(omnie_file)
314
	# FORMAT(
315
	#     2I4,I3,I5,2I3,2I4,14F6.1,F9.0,F6.1,F6.0,2F6.1,F6.3,F6.2,
316
	#     F9.0,F6.1,F6.0,2F6.1,F6.3,2F7.2,F6.1,I3,I4,I6,I5,F10.2,
317
	#     5F9.2,I3,I4,2F6.1,2I6,F5.1,F9.6,F7.4
318
	# )
319
	sw = np.genfromtxt(
320
		omnie_file,
321
		skip_header=0,
322
		delimiter=[
323
		#   1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
324
		#  yy dd hr br i1 i2 n1 n2  B B' tB fB Bx By Bz By Bz sB sB sB
325
			4, 4, 3, 5, 3, 3, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
326
		#  21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
327
		#  sB sB Tp np  v fv tv nr  p sT sn sv sf st sr  E bp  M Kp  R
328
			6, 6, 9, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 7, 7, 6, 3, 4,
329
		#  41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
330
		#  Ds AE p1 p2 p4p10p30p60 fl Apf10 PC AL AU Mm La QI
331
			6, 5,10, 9, 9, 9, 9, 9, 3, 4, 6, 6, 6, 6, 5, 9, 7,
332
		],
333
		dtype=(
334
			"i4,i4,i4,i4,i4,i4,i4,i4,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,"
335
			"f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,i4,i4,"
336
			"i4,i4,f8,f8,f8,f8,f8,f8,i4,i4,f8,f8,i4,i4,f8,f8,f8"
337
		),
338
		names=[
339
			"year", "doy", "hour", "bsrn", "id_imf", "id_sw", "n_imf", "n_plasma",
340
			"B_mag_avg", "B_mag", "theta_B", "phi_B",
341
			"B_x", "B_y_GSE", "B_z_GSE", "B_y_GSM", "B_z_GSM",
342
			"sigma_B_mag_avg", "sigma_B_mag",
343
			"sigma_B_x_GSE", "sigma_B_y_GSE", "sigma_B_z_GSE",
344
			"T_p", "n_p", "v_plasma", "phi_v", "theta_v", "n_alpha_n_p", "p_flow",
345
			"sigma_T", "sigma_n", "sigma_v",
346
			"sigma_phi_v", "sigma_theta_v", "sigma_na_np",
347
			"E", "beta_plasma", "mach", "Kp", "R", "Dst", "AE",
348
			"p_01MeV", "p_02MeV", "p_04MeV", "p_10MeV", "p_30MeV", "p_60MeV",
349
			"flag", "Ap", "f107_adj", "PC", "AL", "AU", "mach_mag", "Lya", "QI_p",
350
		]
351
	)
352
	sw = sw[sw["year"] != -1]
353
	ts = pd.to_datetime(
354
		[
355
			"{0:04d}.{1:03d} {2:02d}".format(yy, dd, hh)
356
			for yy, dd, hh in sw[["year", "doy", "hour"]]
357
		],
358
		format="%Y.%j %H",
359
	)
360
	sw_df = pd.DataFrame(sw, index=ts)
361
	# Adjust Kp to 0...9
362
	sw_df["Kp"] = 0.1 * sw_df["Kp"]
363
	return sw_df
364
365
366
def omnie_hourly(
367
	year,
368
	prefix=None,
369
	ext=None,
370
	local_path=None,
371
	url_base=None,
372
	cache=False,
373
):
374
	"""OMNI hourly data for year `year`
375
376
	Loads the OMNI hourly data for the given year,
377
	from the locally cached data.
378
	Use `local_path` to set a custom location if you
379
	have the omni data already available.
380
381
	Parameters
382
	----------
383
	year: int
384
		Year of the data.
385
	prefix: str, optional, default 'omni2'
386
		File prefix for constructing the file name as <prefix>_year.<ext>.
387
	ext: str, optional, default 'dat'
388
		File extension for constructing the file name as <prefix>_year.<ext>.
389
	local_path: str, optional
390
		Path to the locally stored data yearly files, defaults to the
391
		data location within the package.
392
	url_base: str, optional
393
		URL for the directory that contains the yearly files.
394
	cache: boolean, optional, default False
395
		Download files locally if they are not already available.
396
397
	Returns
398
	-------
399
	sw_df: pandas.DataFrame
400
		The parsed space weather data (hourly values).
401
402
		Raises an ``IOError`` if the file is not available.
403
404
	See Also
405
	--------
406
	read_omnie
407
	"""
408
	prefix = prefix or OMNI_PREFIX
409
	ext = ext or OMNI_EXT
410
	local_path = local_path or LOCAL_PATH
411
	url_base = url_base or OMNI_URL_BASE
412
413
	basename = "{0}_{1:04d}.{2}".format(prefix, year, ext)
414
	omnie_file = os.path.join(local_path, basename)
415
416
	# ensure that the file exists
417
	if not os.path.exists(omnie_file):
418
		warn("Could not find OMNI2 data {0}.".format(omnie_file))
419
		if cache:
420
			cache_omnie(
421
				year,
422
				prefix=prefix, ext=ext,
423
				local_path=local_path, url_base=url_base,
424
			)
425
		else:
426
			warn(
427
				"Local data files not found, pass `cache=True` "
428
				"or run `sw.cache_omnie()` to download the file."
429
			)
430
431
	return read_omnie(omnie_file)
432