Passed
Push — master ( cf4bc8...273399 )
by Stefan
04:24
created

spaceweather.celestrak   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 317
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 102
dl 0
loc 317
rs 10
c 0
b 0
f 0
wmc 17

6 Functions

Rating   Name   Duplication   Size   Complexity  
A read_sw() 0 89 1
A update_data() 0 54 3
A ap_kp_3h() 0 32 2
B sw_daily() 0 44 6
A _doc_param() 0 5 1
A get_file_age() 0 26 4
1
# Copyright (c) 2020--2022 Stefan Bender
2
#
3
# This module is part of pyspaceweather.
4
# pyspaceweather is free software: you can redistribute it or modify
5
# it under the terms of the GNU General Public License as published
6
# by the Free Software Foundation, version 2.
7
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
8
"""Python interface for space weather indices
9
10
Celestrak space weather indices file parser for python [#]_.
11
12
.. [#] https://celestrak.com/SpaceData/
13
"""
14
import os
15
from pkg_resources import resource_filename
16
import logging
17
from warnings import warn
18
19
import numpy as np
20
import pandas as pd
21
22
from .core import _dl_file
23
24
__all__ = [
25
	"sw_daily", "ap_kp_3h", "read_sw",
26
	"get_file_age", "update_data",
27
	"SW_PATH_ALL", "SW_PATH_5Y",
28
]
29
30
DL_URL_ALL = "https://celestrak.com/SpaceData/SW-All.txt"
31
DL_URL_5Y = "https://celestrak.com/SpaceData/SW-Last5Years.txt"
32
SW_FILE_ALL = os.path.basename(DL_URL_ALL)
33
SW_FILE_5Y = os.path.basename(DL_URL_5Y)
34
SW_PATH_ALL = resource_filename(__name__, os.path.join("data", SW_FILE_ALL))
35
SW_PATH_5Y = resource_filename(__name__, os.path.join("data", SW_FILE_5Y))
36
37
38
def get_file_age(swpath, relative=True):
39
	"""Age of the downloaded data file
40
41
	Retrieves the last update time of the given file or full path.
42
43
	Parameters
44
	----------
45
	swpath: str
46
		Filename to check, absolute path or relative to the current dir.
47
	relative: bool, optional, default True
48
		Return the file's age (True) or the last update time (False).
49
50
	Returns
51
	-------
52
	upd: pd.Timestamp or pd.Timedelta
53
		The last updated time or the file age, depending on the setting
54
		of `relative` above.
55
	"""
56
	for line in open(swpath):
57
		if line.startswith("UPDATED"):
58
			# closes the file automatically
59
			break
60
	upd = pd.to_datetime(line.lstrip("UPDATED"), utc=True)
0 ignored issues
show
introduced by
The variable line does not seem to be defined in case the for loop on line 56 is not entered. Are you sure this can never be the case?
Loading history...
61
	if relative:
62
		return pd.Timestamp.utcnow() - upd
63
	return upd
64
65
66
def update_data(
67
	min_age="3h",
68
	swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y,
69
	url_all=DL_URL_ALL, url_5y=DL_URL_5Y,
70
):
71
	"""Update the local space weather index data
72
73
	Updates the local space weather index data from the website [#]_,
74
	given that the 5-year file is older
75
	than `min_age`, or the combined (large) file is older than four years.
76
	If the data is missing for some reason, a download will be attempted nonetheless.
77
78
	All arguments are optional and changing them from the defaults should not
79
	be required neither should it be necessary nor is it recommended.
80
81
	.. [#] https://celestrak.com/SpaceData/
82
83
	Parameters
84
	----------
85
	min_age: str, optional, default "3h"
86
		The time after which a new download will be attempted.
87
		The online data is updated every 3 hours, thus setting this value to
88
		a shorter time is not needed and not recommended.
89
	swpath_all: str, optional, default depending on package install location
90
		Filename for the large combined index file including the
91
		historic data, absolute path or relative to the current dir.
92
	swpath_5y: str, optional, default depending on package install location
93
		Filename for the 5-year index file, absolute path or relative to the current dir.
94
	url_all: str, optional, default `sw.DL_URL_ALL`
95
		The url of the "historic" data file.
96
	url_5y: str, optional, default `sw.DL_URL_5Y`
97
		The url of the data file of containing the indices of the last 5 years.
98
99
	Returns
100
	-------
101
	Nothing.
102
	"""
103
	def _update_file(swpath, url, min_age):
104
		if not os.path.exists(swpath):
105
			logging.info("{0} not found, downloading.".format(swpath))
106
			_dl_file(swpath, url)
107
			return
108
		if get_file_age(swpath) < pd.Timedelta(min_age):
109
			logging.info("not updating '{0}'.".format(swpath))
110
			return
111
		logging.info("updating '{0}'.".format(swpath))
112
		_dl_file(swpath, url)
113
114
	# Update the large file after four years
115
	# to have some overlap with the 5-year data
116
	# 1460 = 4 * 365
117
	_update_file(swpath_all, url_all, "1460days")
118
	# Don't re-download before `min_age` has passed (3h)
119
	_update_file(swpath_5y, url_5y, min_age)
120
121
122
def read_sw(swpath):
123
	"""Read and parse space weather index data file
124
125
	Reads the given file and parses it according to the space weather data format.
126
127
	Parameters
128
	----------
129
	swpath: str
130
		File to parse, absolute path or relative to the current dir.
131
132
	Returns
133
	-------
134
	sw_df: pd.Dataframe
135
		The parsed space weather data (daily values).
136
		The dataframe contains the following columns:
137
138
		"year", "month", "day":
139
			The observation date
140
		"bsrn":
141
			Bartels Solar Rotation Number.
142
			A sequence of 27-day intervals counted continuously from 1832 Feb 8.
143
		"rotd":
144
			Number of Day within the Bartels 27-day cycle (01-27).
145
		"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21":
146
			Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600,
147
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
148
		"Kpsum": Sum of the 8 Kp indices for the day.
149
			Expressed to the nearest third of a unit.
150
		"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21":
151
			Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600,
152
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
153
		"Apavg":
154
			Arithmetic average of the 8 Ap indices for the day.
155
		"Cp":
156
			Cp or Planetary Daily Character Figure. A qualitative estimate of
157
			overall level of magnetic activity for the day determined from the sum
158
			of the 8 Ap indices. Cp ranges, in steps of one-tenth, from 0 (quiet)
159
			to 2.5 (highly disturbed). "C9":
160
		"isn":
161
			International Sunspot Number.
162
			Records contain the Zurich number through 1980 Dec 31 and the
163
			International Brussels number thereafter.
164
		"f107_adj":
165
			10.7-cm Solar Radio Flux (F10.7) Adjusted to 1 AU.
166
			Measured at Ottawa at 1700 UT daily from 1947 Feb 14 until
167
			1991 May 31 and measured at Penticton at 2000 UT from 1991 Jun 01 on.
168
			Expressed in units of 10-22 W/m2/Hz.
169
		"Q":
170
			Flux Qualifier.
171
			0 indicates flux required no adjustment;
172
			1 indicates flux required adjustment for burst in progress at time of measurement;
173
			2 indicates a flux approximated by either interpolation or extrapolation;
174
			3 indicates no observation; and
175
			4 indicates CSSI interpolation of missing data.
176
		"f107_81ctr_adj":
177
			Centered 81-day arithmetic average of F10.7 (adjusted).
178
		"f107_81lst_adj":
179
			Last 81-day arithmetic average of F10.7 (adjusted).
180
		"f107_obs":
181
			Observed (unadjusted) value of F10.7.
182
		"f107_81ctr_obs":
183
			Centered 81-day arithmetic average of F10.7 (observed).
184
		"f107_81lst_obs":
185
			Last 81-day arithmetic average of F10.7 (observed).
186
	"""
187
	sw = np.genfromtxt(
188
		swpath,
189
		skip_header=3,
190
				# yy mm dd br rd kp kp kp kp kp kp kp kp Kp ap ap ap ap ap ap ap ap Ap cp c9 is f1  q f2 f3 f4 f5 f6
191
		delimiter=[4, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 6, 2, 6, 6, 6, 6, 6],
192
		dtype=   "i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,i4,f8,i4,f8,f8,f8,f8,f8",
193
		names=[
194
			"year", "month", "day", "bsrn", "rotd",
195
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum",
196
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
197
			"Cp", "C9", "isn", "f107_adj", "Q", "f107_81ctr_adj", "f107_81lst_adj",
198
			"f107_obs", "f107_81ctr_obs", "f107_81lst_obs"
199
		]
200
	)[2:-1]
201
	sw = sw[sw["year"] != -1]
202
	ts = pd.to_datetime([
203
		"{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd)
204
		for yy, mm, dd in sw[["year", "month", "day"]]
205
	])
206
	sw_df = pd.DataFrame(sw, index=ts)
207
	# Adjust Kp to 0...9
208
	kpns = list(map("Kp{0}".format, range(0, 23, 3))) + ["Kpsum"]
209
	sw_df[kpns] = 0.1 * sw_df[kpns]
210
	return sw_df
211
212
213
# Common arguments for the public daily and 3h interfaces
214
_SW_COMMON_PARAMS = """
215
	Parameters
216
	----------
217
	swpath_all: str, optional, default depending on package install location
218
		Filename for the large combined index file including the
219
		historic data, absolute path or relative to the current dir.
220
	swpath_5y: str, optional, default depending on package install location
221
		Filename for the 5-year index file, absolute path or relative to the current dir.
222
	update: bool, optional, default False
223
		Attempt to update the local data if it is older than `update_interval`.
224
	update_interval: str, optional, default "30days"
225
		The time after which the data are considered "old".
226
		By default, no automatic re-download is initiated, set `update` to true.
227
		The online data is updated every 3 hours, thus setting this value to
228
		a shorter time is not needed and not recommended.
229
"""
230
231
232
def _doc_param(**sub):
233
	def dec(obj):
234
		obj.__doc__ = obj.__doc__.format(**sub)
235
		return obj
236
	return dec
237
238
239
@_doc_param(params=_SW_COMMON_PARAMS)
240
def sw_daily(swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y, update=False, update_interval="30days"):
241
	"""Combined daily Ap, Kp, and f10.7 index values
242
243
	Combines the "historic" and last-5-year data into one dataframe.
244
245
	All arguments are optional and changing them from the defaults should not
246
	be required neither should it be necessary nor is it recommended.
247
	{params}
248
	Returns
249
	-------
250
	sw_df: pd.Dataframe
251
		The combined parsed space weather data (daily values).
252
253
	See Also
254
	--------
255
	ap_kp_3h, read_sw
256
	"""
257
	# ensure that the file exists and is up to date
258
	if (
259
		not os.path.exists(swpath_all)
260
		or not os.path.exists(swpath_5y)
261
	):
262
		warn("Could not find space weather data, trying to download.")
263
		update_data(swpath_all=swpath_all, swpath_5y=swpath_5y)
264
265
	if (
266
		# 1460 = 4 * 365
267
		get_file_age(swpath_all) > pd.Timedelta("1460days")
268
		or get_file_age(swpath_5y) > pd.Timedelta(update_interval)
269
	):
270
		if update:
271
			update_data(swpath_all=swpath_all, swpath_5y=swpath_5y)
272
		else:
273
			warn(
274
				"Local data files are older than {0}, pass `update=True` or "
275
				"run `sw.update_data()` manually if you need newer data.".format(
276
					update_interval
277
				)
278
			)
279
280
	df_all = read_sw(swpath_all)
281
	df_5y = read_sw(swpath_5y)
282
	return pd.concat([df_all[:df_5y.index[0]], df_5y[1:]])
283
284
285
@_doc_param(params=_SW_COMMON_PARAMS)
286
def ap_kp_3h(*args, **kwargs):
287
	"""3h values of Ap and Kp
288
289
	Provides the 3-hourly Ap and Kp indices from the full daily data set.
290
291
	Accepts the same arguments as `sw_daily()`.
292
	All arguments are optional and changing them from the defaults should not
293
	be required neither should it be necessary nor is it recommended.
294
	{params}
295
	Returns
296
	-------
297
	sw_df: pd.Dataframe
298
		The combined Ap and Kp index data (3h values).
299
		The index values are centred at the 3h interval, i.e. at 01:30:00,
300
		04:30:00, 07:30:00, ... and so on.
301
302
	See Also
303
	--------
304
	sw_daily
305
	"""
306
	daily_df = sw_daily(*args, **kwargs)
307
	ret = daily_df.copy()
308
	apns = list(map("Ap{0}".format, range(0, 23, 3)))
309
	kpns = list(map("Kp{0}".format, range(0, 23, 3)))
310
	for i, (ap, kp) in enumerate(zip(apns, kpns)):
311
		ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h")
312
		ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h")
313
	sw_ap = pd.concat(map(ret.__getitem__, apns))
314
	sw_kp = pd.concat(map(ret.__getitem__, kpns))
315
	df = pd.DataFrame({"Ap": sw_ap, "Kp": sw_kp})
316
	return df.reindex(df.index.sort_values())
317