Passed
Push — master ( 190a35...75a1c3 )
by Stefan
01:20
created

spaceweather.omni.omnie_mask_missing()   A

Complexity

Conditions 3

Size

Total Lines 30
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
nop 1
dl 0
loc 30
rs 9.95
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
# Copyright (c) 2022 Stefan Bender
3
#
4
# This module is part of pyspaceweather.
5
# pyspaceweather is free software: you can redistribute it or modify
6
# it under the terms of the GNU General Public License as published
7
# by the Free Software Foundation, version 2.
8
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
9
"""Python interface for OMNI space weather data
10
11
Omni2 [#]_ space weather data interface for python.
12
13
.. [#] https://omniweb.gsfc.nasa.gov/ow.html
14
"""
15
import os
16
from pkg_resources import resource_filename
17
import logging
18
from warnings import warn
19
20
import numpy as np
21
import pandas as pd
22
23
from .core import _assert_file_exists, _dl_file
24
25
__all__ = [
26
	"cache_omnie",
27
	"omnie_hourly",
28
	"omnie_mask_missing",
29
	"read_omnie",
30
]
31
32
OMNI_URL_BASE = "https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended"
33
OMNI_PREFIX, OMNI_EXT = "omni2", "dat"
34
OMNI_SUBDIR = "omni_extended"
35
LOCAL_PATH = resource_filename(__name__, os.path.join("data", OMNI_SUBDIR))
36
37
_OMNI_MISSING = {
38
	"year": None,
39
	"doy": None,
40
	"hour": None,
41
	"bsrn": 9999,
42
	"id_imf": 99,
43
	"id_sw": 99,
44
	"n_imf": 999,
45
	"n_plasma": 999,
46
	"B_mag_avg": 999.9,
47
	"B_mag": 999.9,
48
	"theta_B": 999.9,
49
	"phi_B": 999.9,
50
	"B_x": 999.9,
51
	"B_y_GSE": 999.9,
52
	"B_z_GSE": 999.9,
53
	"B_y_GSM": 999.9,
54
	"B_z_GSM": 999.9,
55
	"sigma_B_mag_avg": 999.9,
56
	"sigma_B_mag": 999.9,
57
	"sigma_B_x_GSE": 999.9,
58
	"sigma_B_y_GSE": 999.9,
59
	"sigma_B_z_GSE": 999.9,
60
	"T_p": 9999999.0,
61
	"n_p": 999.9,
62
	"v_plasma": 9999.0,
63
	"phi_v": 999.9,
64
	"theta_v": 999.9,
65
	"n_alpha_n_p": 9.999,
66
	"p_flow": 99.99,
67
	"sigma_T": 9999999.0,
68
	"sigma_n": 999.9,
69
	"sigma_v": 9999.0,
70
	"sigma_phi_v": 999.9,
71
	"sigma_theta_v": 999.9,
72
	"sigma_na_np": 9.999,
73
	"E": 999.99,
74
	"beta_plasma": 999.99,
75
	"mach": 999.9,
76
	"Kp": 9.9,
77
	"R": 999,
78
	"Dst": 99999,
79
	"AE": 9999,
80
	"p_01MeV": 999999.99,
81
	"p_02MeV": 99999.99,
82
	"p_04MeV": 99999.99,
83
	"p_10MeV": 99999.99,
84
	"p_30MeV": 99999.99,
85
	"p_60MeV": 99999.99,
86
	"flag": 0,
87
	"Ap": 999,
88
	"f107_adj": 999.9,
89
	"PC": 999.9,
90
	"AL": 99999,
91
	"AU": 99999,
92
	"mach_mag": 99.9,
93
	"Lya": 0.999999,
94
	"QI_p": 9.9999
95
}
96
97
98
def cache_omnie(
99
	year,
100
	prefix=None,
101
	ext=None,
102
	local_path=None,
103
	url_base=None,
104
):
105
	"""Download OMNI2 data to local cache
106
107
	Downloads the OMNI2 (extended) data file from [#]_ to the local location.
108
109
	.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/
110
111
	Parameters
112
	----------
113
	year: int
114
		Year of the data.
115
	prefix: str, optional
116
		File prefix for constructing the file name as <prefix>_year.<ext>.
117
		Defaults to 'omni2'.
118
	ext: str, optional
119
		File extension for constructing the file name as <prefix>_year.<ext>.
120
		Defaults to 'dat'.
121
	local_path: str, optional
122
		Path to the locally stored data yearly files, defaults to the
123
		data location within the package.
124
	url_base: str, optional
125
		URL for the directory that contains the yearly files.
126
127
	Returns
128
	-------
129
	Nothing.
130
	"""
131
	prefix = prefix or OMNI_PREFIX
132
	ext = ext or OMNI_EXT
133
	local_path = local_path or LOCAL_PATH
134
	url_base = url_base or OMNI_URL_BASE
135
136
	basename = "{0}_{1:04d}.{2}".format(prefix, year, ext)
137
138
	if not os.path.exists(local_path):
139
		os.makedirs(local_path)
140
141
	omnie_file = os.path.join(local_path, basename)
142
	if not os.path.exists(omnie_file):
143
		url = os.path.join(url_base, basename)
144
		logging.info("%s not found, downloading from %s.", omnie_file, url)
145
		_dl_file(omnie_file, url)
146
147
148
def omnie_mask_missing(df):
149
	"""Mask missing values with NaN
150
151
	Marks missing values in the OMNI2 data set by NaN.
152
	The missing value indicating numbers are taken from the file format description
153
	https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended
154
155
	Parameters
156
	----------
157
	df: pandas.DataFrame
158
		The OMNI2 data set, e.g. from ``omnie_hourly()`` or ``read_omnie()``.
159
160
	Returns
161
	-------
162
	df: pandas.DataFrame
163
		The same dataframe with the missing values masked with numpy.nan.
164
165
	Note
166
	----
167
	This function returns a copy of the dataframe, and all the integer columns
168
	will be converted to float to support NaN.
169
	"""
170
	res = df.copy()
171
	for _c in df.columns:
172
		_m = _OMNI_MISSING.get(_c, None)
173
		if _m is None:
174
			continue
175
		_mask = df[_c] != _m
176
		res[_c] = df[_c].where(_mask)
177
	return res
178
179
180
def read_omnie(omnie_file):
181
	"""Read and parse OMNI2 extended files [#]_
182
183
	Parses the Omni2 extended data files,  available at [#]_,
184
	into a :class:`pandas.DataFrame`.
185
186
	.. [#] https://omniweb.gsfc.nasa.gov/ow.html
187
	.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/
188
189
	Parameters
190
	----------
191
	omnie_file: str
192
		File to parse, absolute path or relative to the current dir.
193
194
	Returns
195
	-------
196
	sw_df: pandas.DataFrame
197
		The parsed OMNI2 space weather data (hourly values).
198
		Details in
199
		https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended
200
201
		Raises an ``IOError`` if the file is not found.
202
203
		The dataframe contains the following columns:
204
205
		year:
206
			The observation year
207
		doy:
208
			Day of the year
209
		hour:
210
			Hour of the day
211
		bsrn:
212
			Bartels Solar Rotation Number.
213
		id_imf:
214
			ID for IMF spacecraft
215
		id_sw:
216
			ID for SW plasma spacecraft
217
		n_imf:
218
			Number of points in IMF averages
219
		n_plasma:
220
			Numberof points in plasma averages
221
		B_mag_avg:
222
			Magnetic field magnitude average B
223
		B_mag:
224
			Magnetic field vector magnitude
225
		theta_B:
226
			Latitude angle of the magnetic field vector
227
		phi_B:
228
			Longitude angle of the magnetic field vector
229
		B_x:
230
			B_x GSE, GSM
231
		B_y_GSE:
232
			B_y GSE
233
		B_z_GSE:
234
			B_z GSE
235
		B_y_GSM:
236
			B_y GSM
237
		B_z_GSM:
238
			B_z GSM
239
		sigma_B_mag_avg:
240
			RMS standard deviation of B_mag_avg
241
		sigma_B_mag:
242
			RMS standard deviation of B_mag
243
		sigma_B_x_GSE:
244
			RMS standard deviation of B_x_GSE
245
		sigma_B_y_GSE:
246
			RMS standard deviation of B_y_GSE
247
		sigma_B_z_GSE:
248
			RMS standard deviation of B_z_GSE
249
		T_p:
250
			Proton temperature
251
		n_p:
252
			Proton density
253
		v_plasma:
254
			Plasma flow speed
255
		phi_v:
256
			Plasma flow longitude angle
257
		theta_v:
258
			Plasma flow latitude angle
259
		n_alpha_n_p:
260
			Alpha/Proton ratio
261
		p_flow:
262
			Flow pressure
263
		sigma_T:
264
			Standard deviation of T_p
265
		sigma_n:
266
			Standard deviation of n_p
267
		sigma_v:
268
			Standard deviation of v_plasma
269
		sigma_phi_v:
270
			Standard deviation of phi_v
271
		sigma_theta_v:
272
			Standard deviation of theta_v
273
		sigma_na_np:
274
			Standard deviation of n_alpha_n_p
275
		E:
276
			Electric field magnitude
277
		beta_plasma:
278
			Plasma beta
279
		mach:
280
			Alfvén Mach number
281
		Kp:
282
			Kp index value
283
		R:
284
			Sunspot number
285
		Dst:
286
			Dst index value
287
		AE:
288
			AE index value
289
		p_01MeV, p_02MeV, p_04MeV, p_10MeV, p_30MeV, p_60MeV:
290
			Proton fluxes >1 MeV, >2 MeV, >4 MeV, >10 MeV, >30 MeV, > 60 MeV
291
		flag:
292
			Flag (-1, ..., 6)
293
		Ap:
294
			Ap index value
295
		f107_adj:
296
			F10.7 radio flux at 1 AU
297
		PC:
298
			PC index value
299
		AL, AU:
300
			AL and AU index values
301
		mach_mag:
302
			Magnetosonic Mach number
303
304
		The extended dataset contains the addional columns:
305
306
		Lya:
307
			Solar Lyman-alpha irradiance
308
		QI_p:
309
			Proton QI
310
	"""
311
	_assert_file_exists(omnie_file)
312
	# FORMAT(
313
	#     2I4,I3,I5,2I3,2I4,14F6.1,F9.0,F6.1,F6.0,2F6.1,F6.3,F6.2,
314
	#     F9.0,F6.1,F6.0,2F6.1,F6.3,2F7.2,F6.1,I3,I4,I6,I5,F10.2,
315
	#     5F9.2,I3,I4,2F6.1,2I6,F5.1,F9.6,F7.4
316
	# )
317
	sw = np.genfromtxt(
318
		omnie_file,
319
		skip_header=0,
320
		delimiter=[
321
		#   1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
322
		#  yy dd hr br i1 i2 n1 n2  B B' tB fB Bx By Bz By Bz sB sB sB
323
			4, 4, 3, 5, 3, 3, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
324
		#  21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
325
		#  sB sB Tp np  v fv tv nr  p sT sn sv sf st sr  E bp  M Kp  R
326
			6, 6, 9, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 7, 7, 6, 3, 4,
327
		#  41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
328
		#  Ds AE p1 p2 p4p10p30p60 fl Apf10 PC AL AU Mm La QI
329
			6, 5,10, 9, 9, 9, 9, 9, 3, 4, 6, 6, 6, 6, 5, 9, 7,
330
		],
331
		dtype=(
332
			"i4,i4,i4,i4,i4,i4,i4,i4,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,"
333
			"f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,i4,i4,"
334
			"i4,i4,f8,f8,f8,f8,f8,f8,i4,i4,f8,f8,i4,i4,f8,f8,f8"
335
		),
336
		names=[
337
			"year", "doy", "hour", "bsrn", "id_imf", "id_sw", "n_imf", "n_plasma",
338
			"B_mag_avg", "B_mag", "theta_B", "phi_B",
339
			"B_x", "B_y_GSE", "B_z_GSE", "B_y_GSM", "B_z_GSM",
340
			"sigma_B_mag_avg", "sigma_B_mag",
341
			"sigma_B_x_GSE", "sigma_B_y_GSE", "sigma_B_z_GSE",
342
			"T_p", "n_p", "v_plasma", "phi_v", "theta_v", "n_alpha_n_p", "p_flow",
343
			"sigma_T", "sigma_n", "sigma_v",
344
			"sigma_phi_v", "sigma_theta_v", "sigma_na_np",
345
			"E", "beta_plasma", "mach", "Kp", "R", "Dst", "AE",
346
			"p_01MeV", "p_02MeV", "p_04MeV", "p_10MeV", "p_30MeV", "p_60MeV",
347
			"flag", "Ap", "f107_adj", "PC", "AL", "AU", "mach_mag", "Lya", "QI_p",
348
		]
349
	)
350
	sw = sw[sw["year"] != -1]
351
	ts = pd.to_datetime(
352
		[
353
			"{0:04d}.{1:03d} {2:02d}".format(yy, dd, hh)
354
			for yy, dd, hh in sw[["year", "doy", "hour"]]
355
		],
356
		format="%Y.%j %H",
357
	)
358
	sw_df = pd.DataFrame(sw, index=ts)
359
	# Adjust Kp to 0...9
360
	sw_df["Kp"] = 0.1 * sw_df["Kp"]
361
	return sw_df
362
363
364
def omnie_hourly(
365
	year,
366
	prefix=None,
367
	ext=None,
368
	local_path=None,
369
	url_base=None,
370
	cache=False,
371
):
372
	"""OMNI hourly data for year `year`
373
374
	Loads the OMNI hourly data for the given year,
375
	from the locally cached data.
376
	Use `local_path` to set a custom location if you
377
	have the omni data already available.
378
379
	Parameters
380
	----------
381
	year: int
382
		Year of the data.
383
	prefix: str, optional, default 'omni2'
384
		File prefix for constructing the file name as <prefix>_year.<ext>.
385
	ext: str, optional, default 'dat'
386
		File extension for constructing the file name as <prefix>_year.<ext>.
387
	local_path: str, optional
388
		Path to the locally stored data yearly files, defaults to the
389
		data location within the package.
390
	url_base: str, optional
391
		URL for the directory that contains the yearly files.
392
	cache: boolean, optional, default False
393
		Download files locally if they are not already available.
394
395
	Returns
396
	-------
397
	sw_df: pandas.DataFrame
398
		The parsed space weather data (hourly values).
399
400
		Raises an ``IOError`` if the file is not available.
401
402
	See Also
403
	--------
404
	read_omnie
405
	"""
406
	prefix = prefix or OMNI_PREFIX
407
	ext = ext or OMNI_EXT
408
	local_path = local_path or LOCAL_PATH
409
	url_base = url_base or OMNI_URL_BASE
410
411
	basename = "{0}_{1:04d}.{2}".format(prefix, year, ext)
412
	omnie_file = os.path.join(local_path, basename)
413
414
	# ensure that the file exists
415
	if not os.path.exists(omnie_file):
416
		warn("Could not find OMNI2 data {0}.".format(omnie_file))
417
		if cache:
418
			cache_omnie(
419
				year,
420
				prefix=prefix, ext=ext,
421
				local_path=local_path, url_base=url_base,
422
			)
423
		else:
424
			warn(
425
				"Local data files not found, pass `cache=True` "
426
				"or run `sw.cache_omnie()` to download the file."
427
			)
428
429
	return read_omnie(omnie_file)
430