Issues (4)

src/spaceweather/celestrak.py (1 issue)

Severity
1
# Copyright (c) 2020--2022 Stefan Bender
2
#
3
# This module is part of pyspaceweather.
4
# pyspaceweather is free software: you can redistribute it or modify
5
# it under the terms of the GNU General Public License as published
6
# by the Free Software Foundation, version 2.
7
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
8
"""Python interface for space weather indices
9
10
Celestrak space weather indices file parser for python [#]_.
11
12
.. [#] https://celestrak.com/SpaceData/
13
"""
14
import os
15
from pkg_resources import resource_filename
16
import logging
17
from warnings import warn
18
19
import numpy as np
20
import pandas as pd
21
22
from .core import _assert_file_exists, _dl_file
23
24
__all__ = [
25
	"sw_daily", "ap_kp_3h", "read_sw",
26
	"get_file_age", "update_data",
27
	"SW_PATH_ALL", "SW_PATH_5Y",
28
]
29
30
DL_URL_ALL = "https://celestrak.com/SpaceData/SW-All.txt"
31
DL_URL_5Y = "https://celestrak.com/SpaceData/SW-Last5Years.txt"
32
SW_FILE_ALL = os.path.basename(DL_URL_ALL)
33
SW_FILE_5Y = os.path.basename(DL_URL_5Y)
34
SW_PATH_ALL = resource_filename(__name__, os.path.join("data", SW_FILE_ALL))
35
SW_PATH_5Y = resource_filename(__name__, os.path.join("data", SW_FILE_5Y))
36
37
38
def get_file_age(swpath, relative=True):
39
	"""Age of the downloaded data file
40
41
	Retrieves the last update time of the given file or full path.
42
43
	Parameters
44
	----------
45
	swpath: str
46
		Filename to check, absolute path or relative to the current dir.
47
	relative: bool, optional, default True
48
		Return the file's age (True) or the last update time (False).
49
50
	Returns
51
	-------
52
	upd: pandas.Timestamp or pandas.Timedelta
53
		The last updated time or the file age, depending on the setting
54
		of `relative` above.
55
		Raises ``IOError`` if the file is not found.
56
	"""
57
	_assert_file_exists(swpath)
58
	with open(swpath) as fp:
59
		for line in fp:
60
			if line.startswith("UPDATED"):
61
				# closes the file automatically
62
				break
63
	upd = pd.to_datetime(line.lstrip("UPDATED"), utc=True)
0 ignored issues
show
The variable line does not seem to be defined in case the for loop on line 59 is not entered. Are you sure this can never be the case?
Loading history...
64
	if relative:
65
		return pd.Timestamp.utcnow() - upd
66
	return upd
67
68
69
def update_data(
70
	min_age="3h",
71
	swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y,
72
	url_all=DL_URL_ALL, url_5y=DL_URL_5Y,
73
):
74
	"""Update the local space weather index data
75
76
	Updates the local space weather index data from the website [#]_,
77
	given that the 5-year file is older
78
	than `min_age`, or the combined (large) file is older than four years.
79
	If the data is missing for some reason, a download will be attempted nonetheless.
80
81
	All arguments are optional and changing them from the defaults should not
82
	be required neither should it be necessary nor is it recommended.
83
84
	.. [#] https://celestrak.com/SpaceData/
85
86
	Parameters
87
	----------
88
	min_age: str, optional, default "3h"
89
		The time after which a new download will be attempted.
90
		The online data is updated every 3 hours, thus setting this value to
91
		a shorter time is not needed and not recommended.
92
	swpath_all: str, optional, default depending on package install location
93
		Filename for the large combined index file including the
94
		historic data, absolute path or relative to the current dir.
95
	swpath_5y: str, optional, default depending on package install location
96
		Filename for the 5-year index file, absolute path or relative to the current dir.
97
	url_all: str, optional, default `sw.DL_URL_ALL`
98
		The url of the "historic" data file.
99
	url_5y: str, optional, default `sw.DL_URL_5Y`
100
		The url of the data file of containing the indices of the last 5 years.
101
102
	Returns
103
	-------
104
	Nothing.
105
	"""
106
	def _update_file(swpath, url, min_age):
107
		if not os.path.exists(swpath):
108
			logging.info("{0} not found, downloading.".format(swpath))
109
			_dl_file(swpath, url)
110
			return
111
		if get_file_age(swpath) < pd.Timedelta(min_age):
112
			logging.info("not updating '{0}'.".format(swpath))
113
			return
114
		logging.info("updating '{0}'.".format(swpath))
115
		_dl_file(swpath, url)
116
117
	# Update the large file after four years
118
	# to have some overlap with the 5-year data
119
	# 1460 = 4 * 365
120
	_update_file(swpath_all, url_all, "1460days")
121
	# Don't re-download before `min_age` has passed (3h)
122
	_update_file(swpath_5y, url_5y, min_age)
123
124
125
def read_sw(swpath):
126
	"""Read and parse space weather index data file
127
128
	Reads the given file and parses it according to the space weather data format.
129
130
	Parameters
131
	----------
132
	swpath: str
133
		File to parse, absolute path or relative to the current dir.
134
135
	Returns
136
	-------
137
	sw_df: pandas.DataFrame
138
		The parsed space weather data (daily values).
139
		Raises an ``IOError`` if the file is not found.
140
141
		The dataframe contains the following columns:
142
143
		"year", "month", "day":
144
			The observation date
145
		"bsrn":
146
			Bartels Solar Rotation Number.
147
			A sequence of 27-day intervals counted continuously from 1832 Feb 8.
148
		"rotd":
149
			Number of Day within the Bartels 27-day cycle (01-27).
150
		"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21":
151
			Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600,
152
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
153
		"Kpsum": Sum of the 8 Kp indices for the day.
154
			Expressed to the nearest third of a unit.
155
		"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21":
156
			Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600,
157
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
158
		"Apavg":
159
			Arithmetic average of the 8 Ap indices for the day.
160
		"Cp":
161
			Cp or Planetary Daily Character Figure. A qualitative estimate of
162
			overall level of magnetic activity for the day determined from the sum
163
			of the 8 Ap indices. Cp ranges, in steps of one-tenth, from 0 (quiet)
164
			to 2.5 (highly disturbed). "C9":
165
		"isn":
166
			International Sunspot Number.
167
			Records contain the Zurich number through 1980 Dec 31 and the
168
			International Brussels number thereafter.
169
		"f107_adj":
170
			10.7-cm Solar Radio Flux (F10.7) Adjusted to 1 AU.
171
			Measured at Ottawa at 1700 UT daily from 1947 Feb 14 until
172
			1991 May 31 and measured at Penticton at 2000 UT from 1991 Jun 01 on.
173
			Expressed in units of 10-22 W/m2/Hz.
174
		"Q":
175
			Flux Qualifier.
176
			0 indicates flux required no adjustment;
177
			1 indicates flux required adjustment for burst in progress at time of measurement;
178
			2 indicates a flux approximated by either interpolation or extrapolation;
179
			3 indicates no observation; and
180
			4 indicates CSSI interpolation of missing data.
181
		"f107_81ctr_adj":
182
			Centered 81-day arithmetic average of F10.7 (adjusted).
183
		"f107_81lst_adj":
184
			Last 81-day arithmetic average of F10.7 (adjusted).
185
		"f107_obs":
186
			Observed (unadjusted) value of F10.7.
187
		"f107_81ctr_obs":
188
			Centered 81-day arithmetic average of F10.7 (observed).
189
		"f107_81lst_obs":
190
			Last 81-day arithmetic average of F10.7 (observed).
191
	"""
192
	_assert_file_exists(swpath)
193
	sw = np.genfromtxt(
194
		swpath,
195
		skip_header=3,
196
		delimiter=[
197
		#  yy mm dd br rd kp kp kp kp kp kp kp kp Kp
198
			4, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4,
199
		#  ap ap ap ap ap ap ap ap Ap cp c9 is f1  q
200
			4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 6, 2,
201
		#  f2 f3 f4 f5 f6
202
			6, 6, 6, 6, 6],
203
		dtype=(
204
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,"
205
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,i4,f8,i4,"
206
			"f8,f8,f8,f8,f8"
207
		),
208
		names=[
209
			"year", "month", "day", "bsrn", "rotd",
210
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum",
211
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
212
			"Cp", "C9", "isn", "f107_adj", "Q", "f107_81ctr_adj", "f107_81lst_adj",
213
			"f107_obs", "f107_81ctr_obs", "f107_81lst_obs"
214
		]
215
	)[2:-1]
216
	sw = sw[sw["year"] != -1]
217
	ts = pd.to_datetime([
218
		"{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd)
219
		for yy, mm, dd in sw[["year", "month", "day"]]
220
	])
221
	sw_df = pd.DataFrame(sw, index=ts)
222
	# Adjust Kp to 0...9
223
	kpns = list(map("Kp{0}".format, range(0, 23, 3))) + ["Kpsum"]
224
	sw_df[kpns] = 0.1 * sw_df[kpns]
225
	return sw_df
226
227
228
# Common arguments for the public daily and 3h interfaces
229
_SW_COMMON_PARAMS = """
230
	Parameters
231
	----------
232
	swpath_all: str, optional, default depending on package install location
233
		Filename for the large combined index file including the
234
		historic data, absolute path or relative to the current dir.
235
	swpath_5y: str, optional, default depending on package install location
236
		Filename for the 5-year index file, absolute path or relative to the current dir.
237
	update: bool, optional, default False
238
		Attempt to update the local data if it is older than `update_interval`.
239
	update_interval: str, optional, default "30days"
240
		The time after which the data are considered "old".
241
		By default, no automatic re-download is initiated, set `update` to true.
242
		The online data is updated every 3 hours, thus setting this value to
243
		a shorter time is not needed and not recommended.
244
"""
245
246
247
def _doc_param(**sub):
248
	def dec(obj):
249
		obj.__doc__ = obj.__doc__.format(**sub)
250
		return obj
251
	return dec
252
253
254
@_doc_param(params=_SW_COMMON_PARAMS)
255
def sw_daily(swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y, update=False, update_interval="30days"):
256
	"""Combined daily Ap, Kp, and f10.7 index values
257
258
	Combines the "historic" and last-5-year data into one dataframe.
259
260
	All arguments are optional and changing them from the defaults should not
261
	be required neither should it be necessary nor is it recommended.
262
	{params}
263
	Returns
264
	-------
265
	sw_df: pandas.DataFrame
266
		The combined parsed space weather data (daily values).
267
		Raises ``IOError`` if the data files cannot be found.
268
269
	See Also
270
	--------
271
	ap_kp_3h, read_sw
272
	"""
273
	# ensure that the file exists and is up to date
274
	if (
275
		not os.path.exists(swpath_all)
276
		or not os.path.exists(swpath_5y)
277
	):
278
		warn("Could not find space weather data, trying to download.")
279
		update_data(swpath_all=swpath_all, swpath_5y=swpath_5y)
280
281
	if (
282
		# 1460 = 4 * 365
283
		get_file_age(swpath_all) > pd.Timedelta("1460days")
284
		or get_file_age(swpath_5y) > pd.Timedelta(update_interval)
285
	):
286
		if update:
287
			update_data(swpath_all=swpath_all, swpath_5y=swpath_5y)
288
		else:
289
			warn(
290
				"Local data files are older than {0}, pass `update=True` or "
291
				"run `sw.update_data()` manually if you need newer data.".format(
292
					update_interval
293
				)
294
			)
295
296
	df_all = read_sw(swpath_all)
297
	df_5y = read_sw(swpath_5y)
298
	return pd.concat([df_all[:df_5y.index[0]], df_5y[1:]])
299
300
301
@_doc_param(params=_SW_COMMON_PARAMS)
302
def ap_kp_3h(*args, **kwargs):
303
	"""3h values of Ap and Kp
304
305
	Provides the 3-hourly Ap and Kp indices from the full daily data set.
306
307
	Accepts the same arguments as `sw_daily()`.
308
	All arguments are optional and changing them from the defaults should not
309
	be required neither should it be necessary nor is it recommended.
310
	{params}
311
	Returns
312
	-------
313
	sw_df: pandas.DataFrame
314
		The combined Ap and Kp index data (3h values).
315
		The index values are centred at the 3h interval, i.e. at 01:30:00,
316
		04:30:00, 07:30:00, ... and so on.
317
		Raises ``IOError`` if the data files cannot be found.
318
319
	See Also
320
	--------
321
	sw_daily
322
	"""
323
	daily_df = sw_daily(*args, **kwargs)
324
	ret = daily_df.copy()
325
	apns = list(map("Ap{0}".format, range(0, 23, 3)))
326
	kpns = list(map("Kp{0}".format, range(0, 23, 3)))
327
	for i, (ap, kp) in enumerate(zip(apns, kpns)):
328
		ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h")
329
		ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h")
330
	sw_ap = pd.concat(map(ret.__getitem__, apns))
331
	sw_kp = pd.concat(map(ret.__getitem__, kpns))
332
	df = pd.DataFrame({"Ap": sw_ap, "Kp": sw_kp})
333
	return df.reindex(df.index.sort_values())
334