spaceweather.omni.read_omnie()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 185
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 32
nop 1
dl 0
loc 185
rs 9.112
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
# Copyright (c) 2022 Stefan Bender
3
#
4
# This module is part of pyspaceweather.
5
# pyspaceweather is free software: you can redistribute it or modify
6
# it under the terms of the GNU General Public License as published
7
# by the Free Software Foundation, version 2.
8
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
9
"""Python interface for OMNI space weather data
10
11
Omni2 [#]_ space weather data interface for python.
12
13
.. [#] https://omniweb.gsfc.nasa.gov/ow.html
14
"""
15
import os
16
import logging
17
from warnings import warn
18
19
from posixpath import join as urljoin
20
21
import numpy as np
22
import pandas as pd
23
24
from .core import _assert_file_exists, _dl_file, _resource_filepath
25
26
__all__ = [
27
	"cache_omnie",
28
	"omnie_hourly",
29
	"omnie_mask_missing",
30
	"read_omnie",
31
]
32
33
OMNI_URL_BASE = "https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended"
34
OMNI_PREFIX, OMNI_EXT = "omni2", "dat"
35
OMNI_SUBDIR = "omni_extended"
36
LOCAL_PATH = _resource_filepath(OMNI_SUBDIR)
37
38
_OMNI_MISSING = {
39
	"year": None,
40
	"doy": None,
41
	"hour": None,
42
	"bsrn": 9999,
43
	"id_imf": 99,
44
	"id_sw": 99,
45
	"n_imf": 999,
46
	"n_plasma": 999,
47
	"B_mag_avg": 999.9,
48
	"B_mag": 999.9,
49
	"theta_B": 999.9,
50
	"phi_B": 999.9,
51
	"B_x": 999.9,
52
	"B_y_GSE": 999.9,
53
	"B_z_GSE": 999.9,
54
	"B_y_GSM": 999.9,
55
	"B_z_GSM": 999.9,
56
	"sigma_B_mag_avg": 999.9,
57
	"sigma_B_mag": 999.9,
58
	"sigma_B_x_GSE": 999.9,
59
	"sigma_B_y_GSE": 999.9,
60
	"sigma_B_z_GSE": 999.9,
61
	"T_p": 9999999.0,
62
	"n_p": 999.9,
63
	"v_plasma": 9999.0,
64
	"phi_v": 999.9,
65
	"theta_v": 999.9,
66
	"n_alpha_n_p": 9.999,
67
	"p_flow": 99.99,
68
	"sigma_T": 9999999.0,
69
	"sigma_n": 999.9,
70
	"sigma_v": 9999.0,
71
	"sigma_phi_v": 999.9,
72
	"sigma_theta_v": 999.9,
73
	"sigma_na_np": 9.999,
74
	"E": 999.99,
75
	"beta_plasma": 999.99,
76
	"mach": 999.9,
77
	"Kp": 9.9,
78
	"R": 999,
79
	"Dst": 99999,
80
	"AE": 9999,
81
	"p_01MeV": 999999.99,
82
	"p_02MeV": 99999.99,
83
	"p_04MeV": 99999.99,
84
	"p_10MeV": 99999.99,
85
	"p_30MeV": 99999.99,
86
	"p_60MeV": 99999.99,
87
	"flag": 0,
88
	"Ap": 999,
89
	"f107_adj": 999.9,
90
	"PC": 999.9,
91
	"AL": 99999,
92
	"AU": 99999,
93
	"mach_mag": 99.9,
94
	"Lya": 0.999999,
95
	"QI_p": 9.9999
96
}
97
98
99
def _doc_param(**sub):
100
	def dec(obj):
101
		obj.__doc__ = obj.__doc__.format(**sub)
102
		return obj
103
	return dec
104
105
106
@_doc_param(prefix=OMNI_PREFIX, ext=OMNI_EXT)
107
def cache_omnie(
108
	year,
109
	prefix=None,
110
	ext=None,
111
	local_path=None,
112
	url_base=None,
113
):
114
	"""Download OMNI2 data to local cache
115
116
	Downloads the OMNI2 (extended) data file from [#]_ to the local location.
117
118
	.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/
119
120
	Parameters
121
	----------
122
	year: int
123
		Year of the data.
124
	prefix: `None` or str, optional, default `None`
125
		File prefix for constructing the file name as <prefix>_year.<ext>.
126
		`None` defaults to '{prefix}'.
127
	ext: `None` or str, optional, default `None`
128
		File extension for constructing the file name as <prefix>_year.<ext>.
129
		`None` defaults to '{ext}'.
130
	local_path: `None` or str, optional, default `None`
131
		Path to the locally stored data yearly files, defaults to the
132
		data location within the package.
133
		`None` uses the package's default file location.
134
	url_base: `None` or str, optional, default `None`
135
		URL for the directory that contains the yearly files.
136
		`None` uses the default base url.
137
138
	Returns
139
	-------
140
	Nothing.
141
	"""
142
	prefix = prefix or OMNI_PREFIX
143
	ext = ext or OMNI_EXT
144
	local_path = local_path or LOCAL_PATH
145
	url_base = url_base or OMNI_URL_BASE
146
147
	basename = "{0}_{1:04d}.{2}".format(prefix, year, ext)
148
149
	if not os.path.exists(local_path):
150
		os.makedirs(local_path)
151
152
	omnie_file = os.path.join(local_path, basename)
153
	if not os.path.exists(omnie_file):
154
		url = urljoin(url_base, basename)
155
		logging.info("%s not found, downloading from %s.", omnie_file, url)
156
		_dl_file(omnie_file, url)
157
158
159
def omnie_mask_missing(df):
160
	"""Mask missing values with NaN
161
162
	Marks missing values in the OMNI2 data set by NaN.
163
	The missing value indicating numbers are taken from the file format description
164
	https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended
165
166
	Parameters
167
	----------
168
	df: pandas.DataFrame
169
		The OMNI2 data set, e.g. from :func:`omnie_hourly()` or :func:`read_omnie()`.
170
171
	Returns
172
	-------
173
	df: pandas.DataFrame
174
		The same dataframe with the missing values masked with ``numpy.nan``.
175
176
	Notes
177
	-----
178
	This function returns a copy of the dataframe, and all the integer columns
179
	will be converted to float to support NaN.
180
	"""
181
	res = df.copy()
182
	for _c in df.columns:
183
		_m = _OMNI_MISSING.get(_c, None)
184
		if _m is None:
185
			continue
186
		_mask = df[_c] != _m
187
		res[_c] = df[_c].where(_mask)
188
	return res
189
190
191
def read_omnie(omnie_file):
192
	"""Read and parse OMNI2 extended files [#]_
193
194
	Parses the Omni2 extended data files,  available at [#]_,
195
	into a :class:`pandas.DataFrame`.
196
197
	.. [#] https://omniweb.gsfc.nasa.gov/ow.html
198
	.. [#] https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/
199
200
	Parameters
201
	----------
202
	omnie_file: str
203
		File to parse, absolute path or relative to the current dir.
204
205
	Returns
206
	-------
207
	sw_df: pandas.DataFrame
208
		The parsed OMNI2 space weather data (hourly values).
209
		Details in
210
		https://spdf.gsfc.nasa.gov/pub/data/omni/low_res_omni/extended/aareadme_extended
211
212
		Raises an ``IOError`` if the file is not found.
213
		The index is returned timezone-naive but contains UTC timestamps.
214
		To convert to a timezone-aware index, use
215
		:meth:`pandas.DataFrame.tz_localize()`: ``sw_df.tz_localize("utc")``.
216
217
		The dataframe contains the following columns:
218
219
		year:
220
			The observation year
221
		doy:
222
			Day of the year
223
		hour:
224
			Hour of the day
225
		bsrn:
226
			Bartels Solar Rotation Number.
227
		id_imf:
228
			ID for IMF spacecraft
229
		id_sw:
230
			ID for SW plasma spacecraft
231
		n_imf:
232
			Number of points in IMF averages
233
		n_plasma:
234
			Numberof points in plasma averages
235
		B_mag_avg:
236
			Magnetic field magnitude average B
237
		B_mag:
238
			Magnetic field vector magnitude
239
		theta_B:
240
			Latitude angle of the magnetic field vector
241
		phi_B:
242
			Longitude angle of the magnetic field vector
243
		B_x:
244
			B_x GSE, GSM
245
		B_y_GSE:
246
			B_y GSE
247
		B_z_GSE:
248
			B_z GSE
249
		B_y_GSM:
250
			B_y GSM
251
		B_z_GSM:
252
			B_z GSM
253
		sigma_B_mag_avg:
254
			RMS standard deviation of B_mag_avg
255
		sigma_B_mag:
256
			RMS standard deviation of B_mag
257
		sigma_B_x_GSE:
258
			RMS standard deviation of B_x_GSE
259
		sigma_B_y_GSE:
260
			RMS standard deviation of B_y_GSE
261
		sigma_B_z_GSE:
262
			RMS standard deviation of B_z_GSE
263
		T_p:
264
			Proton temperature
265
		n_p:
266
			Proton density
267
		v_plasma:
268
			Plasma flow speed
269
		phi_v:
270
			Plasma flow longitude angle
271
		theta_v:
272
			Plasma flow latitude angle
273
		n_alpha_n_p:
274
			Alpha/Proton ratio
275
		p_flow:
276
			Flow pressure
277
		sigma_T:
278
			Standard deviation of T_p
279
		sigma_n:
280
			Standard deviation of n_p
281
		sigma_v:
282
			Standard deviation of v_plasma
283
		sigma_phi_v:
284
			Standard deviation of phi_v
285
		sigma_theta_v:
286
			Standard deviation of theta_v
287
		sigma_na_np:
288
			Standard deviation of n_alpha_n_p
289
		E:
290
			Electric field magnitude
291
		beta_plasma:
292
			Plasma beta
293
		mach:
294
			Alfvén Mach number
295
		Kp:
296
			Kp index value
297
		R:
298
			Sunspot number
299
		Dst:
300
			Dst index value
301
		AE:
302
			AE index value
303
		p_01MeV, p_02MeV, p_04MeV, p_10MeV, p_30MeV, p_60MeV:
304
			Proton fluxes >1 MeV, >2 MeV, >4 MeV, >10 MeV, >30 MeV, > 60 MeV
305
		flag:
306
			Flag (-1, ..., 6)
307
		Ap:
308
			Ap index value
309
		f107_adj:
310
			F10.7 radio flux at 1 AU
311
		PC:
312
			PC index value
313
		AL, AU:
314
			AL and AU index values
315
		mach_mag:
316
			Magnetosonic Mach number
317
318
		The extended dataset contains the addional columns:
319
320
		Lya:
321
			Solar Lyman-alpha irradiance
322
		QI_p:
323
			Proton QI
324
	"""
325
	_assert_file_exists(omnie_file)
326
	# FORMAT(
327
	#     2I4,I3,I5,2I3,2I4,14F6.1,F9.0,F6.1,F6.0,2F6.1,F6.3,F6.2,
328
	#     F9.0,F6.1,F6.0,2F6.1,F6.3,2F7.2,F6.1,I3,I4,I6,I5,F10.2,
329
	#     5F9.2,I3,I4,2F6.1,2I6,F5.1,F9.6,F7.4
330
	# )
331
	sw = np.genfromtxt(
332
		omnie_file,
333
		skip_header=0,
334
		delimiter=[
335
		#   1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
336
		#  yy dd hr br i1 i2 n1 n2  B B' tB fB Bx By Bz By Bz sB sB sB
337
			4, 4, 3, 5, 3, 3, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
338
		#  21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
339
		#  sB sB Tp np  v fv tv nr  p sT sn sv sf st sr  E bp  M Kp  R
340
			6, 6, 9, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 7, 7, 6, 3, 4,
341
		#  41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
342
		#  Ds AE p1 p2 p4p10p30p60 fl Apf10 PC AL AU Mm La QI
343
			6, 5,10, 9, 9, 9, 9, 9, 3, 4, 6, 6, 6, 6, 5, 9, 7,
344
		],
345
		dtype=(
346
			"i4,i4,i4,i4,i4,i4,i4,i4,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,"
347
			"f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,f8,i4,i4,"
348
			"i4,i4,f8,f8,f8,f8,f8,f8,i4,i4,f8,f8,i4,i4,f8,f8,f8"
349
		),
350
		names=[
351
			"year", "doy", "hour", "bsrn", "id_imf", "id_sw", "n_imf", "n_plasma",
352
			"B_mag_avg", "B_mag", "theta_B", "phi_B",
353
			"B_x", "B_y_GSE", "B_z_GSE", "B_y_GSM", "B_z_GSM",
354
			"sigma_B_mag_avg", "sigma_B_mag",
355
			"sigma_B_x_GSE", "sigma_B_y_GSE", "sigma_B_z_GSE",
356
			"T_p", "n_p", "v_plasma", "phi_v", "theta_v", "n_alpha_n_p", "p_flow",
357
			"sigma_T", "sigma_n", "sigma_v",
358
			"sigma_phi_v", "sigma_theta_v", "sigma_na_np",
359
			"E", "beta_plasma", "mach", "Kp", "R", "Dst", "AE",
360
			"p_01MeV", "p_02MeV", "p_04MeV", "p_10MeV", "p_30MeV", "p_60MeV",
361
			"flag", "Ap", "f107_adj", "PC", "AL", "AU", "mach_mag", "Lya", "QI_p",
362
		]
363
	)
364
	sw = sw[sw["year"] != -1]
365
	ts = pd.to_datetime(
366
		[
367
			"{0:04d}.{1:03d} {2:02d}".format(yy, dd, hh)
368
			for yy, dd, hh in sw[["year", "doy", "hour"]]
369
		],
370
		format="%Y.%j %H",
371
	)
372
	sw_df = pd.DataFrame(sw, index=ts)
373
	# Adjust Kp to 0...9
374
	sw_df["Kp"] = 0.1 * sw_df["Kp"]
375
	return sw_df
376
377
378
@_doc_param(prefix=OMNI_PREFIX, ext=OMNI_EXT)
379
def omnie_hourly(
380
	year,
381
	prefix=None,
382
	ext=None,
383
	local_path=None,
384
	url_base=None,
385
	cache=False,
386
):
387
	"""OMNI hourly data for year `year`
388
389
	Loads the OMNI hourly data for the given year,
390
	from the locally cached data.
391
	Use `local_path` to set a custom location if you
392
	have the omni data already available.
393
394
	Parameters
395
	----------
396
	year: int
397
		Year of the data.
398
	prefix: `None` or str, optional, default `None`
399
		File prefix for constructing the file name as <prefix>_year.<ext>.
400
		`None` defaults to '{prefix}'.
401
	ext: `None` or str, optional, default `None`
402
		File extension for constructing the file name as <prefix>_year.<ext>.
403
		`None` defaults to '{ext}'.
404
	local_path: `None` or str, optional, default `None`
405
		Path to the locally stored data yearly files, defaults to the
406
		data location within the package.
407
		`None` uses the package's default file location.
408
	url_base: `None` or str, optional, default `None`
409
		URL for the directory that contains the yearly files.
410
		`None` uses the default base url.
411
	cache: boolean, optional, default False
412
		Download files locally if they are not already available.
413
414
	Returns
415
	-------
416
	sw_df: pandas.DataFrame
417
		The parsed space weather data (hourly values).
418
419
		Raises an ``IOError`` if the file is not available.
420
		The index is returned timezone-naive but contains UTC timestamps.
421
		To convert to a timezone-aware index, use
422
		:meth:`pandas.DataFrame.tz_localize()`: ``sw_df.tz_localize("utc")``.
423
424
	See Also
425
	--------
426
	read_omnie
427
	"""
428
	prefix = prefix or OMNI_PREFIX
429
	ext = ext or OMNI_EXT
430
	local_path = local_path or LOCAL_PATH
431
	url_base = url_base or OMNI_URL_BASE
432
433
	basename = "{0}_{1:04d}.{2}".format(prefix, year, ext)
434
	omnie_file = os.path.join(local_path, basename)
435
436
	# ensure that the file exists
437
	if not os.path.exists(omnie_file):
438
		warn("Could not find OMNI2 data {0}.".format(omnie_file))
439
		if cache:
440
			cache_omnie(
441
				year,
442
				prefix=prefix, ext=ext,
443
				local_path=local_path, url_base=url_base,
444
			)
445
		else:
446
			warn(
447
				"Local data files not found, pass `cache=True` "
448
				"or run `sw.cache_omnie()` to download the file."
449
			)
450
451
	return read_omnie(omnie_file)
452