spaceweather.celestrak.update_data()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 63
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 20
nop 5
dl 0
loc 63
rs 9.4
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# Copyright (c) 2020--2022 Stefan Bender
2
#
3
# This module is part of pyspaceweather.
4
# pyspaceweather is free software: you can redistribute it or modify
5
# it under the terms of the GNU General Public License as published
6
# by the Free Software Foundation, version 2.
7
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
8
"""Python interface for space weather indices
9
10
Celestrak space weather indices file parser for python [#]_.
11
12
.. [#] https://celestrak.com/SpaceData/
13
"""
14
import os
15
import logging
16
from warnings import warn
17
18
import numpy as np
19
import pandas as pd
20
21
from .core import _assert_file_exists, _dl_file, _resource_filepath
22
23
__all__ = [
24
	"sw_daily", "ap_kp_3h", "read_sw",
25
	"get_file_age", "update_data",
26
	"SW_PATH_ALL", "SW_PATH_5Y",
27
]
28
29
DL_URL_ALL = "https://celestrak.com/SpaceData/SW-All.txt"
30
DL_URL_5Y = "https://celestrak.com/SpaceData/SW-Last5Years.txt"
31
SW_FILE_ALL = os.path.basename(DL_URL_ALL)
32
SW_FILE_5Y = os.path.basename(DL_URL_5Y)
33
SW_PATH_ALL = _resource_filepath(SW_FILE_ALL)
34
SW_PATH_5Y = _resource_filepath(SW_FILE_5Y)
35
36
37
def get_file_age(swpath, relative=True):
38
	"""Age of the downloaded data file
39
40
	Retrieves the last update time of the given file or full path.
41
42
	Parameters
43
	----------
44
	swpath: str
45
		Filename to check, absolute path or relative to the current dir.
46
	relative: bool, optional, default True
47
		Return the file's age (True) or the last update time (False).
48
49
	Returns
50
	-------
51
	upd: pandas.Timestamp or pandas.Timedelta
52
		The last updated time or the file age, depending on the setting
53
		of `relative` above.
54
		Raises ``IOError`` if the file is not found.
55
	"""
56
	_assert_file_exists(swpath)
57
	with open(swpath) as fp:
58
		for line in fp:
59
			if line.startswith("UPDATED"):
60
				# closes the file automatically
61
				break
62
	upd = pd.to_datetime(line.lstrip("UPDATED"), utc=True)
0 ignored issues
show
introduced by
The variable line does not seem to be defined in case the for loop on line 58 is not entered. Are you sure this can never be the case?
Loading history...
63
	if relative:
64
		return pd.Timestamp.utcnow() - upd
65
	return upd
66
67
68
def update_data(
69
	min_age="3h",
70
	swpath_all=None, swpath_5y=None,
71
	url_all=None, url_5y=None,
72
):
73
	"""Update the local space weather index data
74
75
	Updates the local space weather index data from the website [#]_,
76
	given that the 5-year file is older
77
	than `min_age`, or the combined (large) file is older than four years.
78
	If the data is missing for some reason, a download will be attempted nonetheless.
79
80
	All arguments are optional and changing them from the defaults should not
81
	be required neither should it be necessary nor is it recommended.
82
83
	.. [#] https://celestrak.com/SpaceData/
84
85
	Parameters
86
	----------
87
	min_age: str, optional, default "3h"
88
		The time after which a new download will be attempted.
89
		The online data is updated every 3 hours, thus setting this value to
90
		a shorter time is not needed and not recommended.
91
	swpath_all: `None` or str, optional, default `None`
92
		Filename for the large combined index file including the
93
		historic data, absolute path or relative to the current dir.
94
		`None` uses the package's default file location.
95
	swpath_5y: `None` or str, optional, default `None`
96
		Filename for the 5-year index file, absolute path or relative to the current dir.
97
		`None` uses the package's default file location.
98
	url_all: `None` or str, optional, default `None`
99
		The url of the "historic" data file.
100
		`None` uses the default url.
101
	url_5y: `None` or str, optional, default `None`
102
		The url of the data file of containing the indices of the last 5 years.
103
		`None` uses the default url.
104
105
	Returns
106
	-------
107
	Nothing.
108
	"""
109
	def _update_file(swpath, url, min_age):
110
		if not os.path.exists(swpath):
111
			logging.info("{0} not found, downloading.".format(swpath))
112
			_dl_file(swpath, url)
113
			return
114
		if get_file_age(swpath) < pd.Timedelta(min_age):
115
			logging.info("not updating '{0}'.".format(swpath))
116
			return
117
		logging.info("updating '{0}'.".format(swpath))
118
		_dl_file(swpath, url)
119
120
	swpath_all = swpath_all or SW_PATH_ALL
121
	swpath_5y = swpath_5y or SW_PATH_5Y
122
	url_all = url_all or DL_URL_ALL
123
	url_5y = url_5y or DL_URL_5Y
124
125
	# Update the large file after four years
126
	# to have some overlap with the 5-year data
127
	# 1460 = 4 * 365
128
	_update_file(swpath_all, url_all, "1460days")
129
	# Don't re-download before `min_age` has passed (3h)
130
	_update_file(swpath_5y, url_5y, min_age)
131
132
133
def read_sw(swpath):
134
	"""Read and parse space weather index data file
135
136
	Reads the given file and parses it according to the space weather data format.
137
138
	Parameters
139
	----------
140
	swpath: str
141
		File to parse, absolute path or relative to the current dir.
142
143
	Returns
144
	-------
145
	sw_df: pandas.DataFrame
146
		The parsed space weather data (daily values).
147
		Raises an ``IOError`` if the file is not found.
148
		The index is returned timezone-naive but contains UTC timestamps.
149
		To convert to a timezone-aware index, use
150
		:meth:`pandas.DataFrame.tz_localize()`: ``sw_df.tz_localize("utc")``.
151
152
		The dataframe contains the following columns:
153
154
		"year", "month", "day":
155
			The observation date
156
		"bsrn":
157
			Bartels Solar Rotation Number.
158
			A sequence of 27-day intervals counted continuously from 1832 Feb 8.
159
		"rotd":
160
			Number of Day within the Bartels 27-day cycle (01-27).
161
		"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21":
162
			Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600,
163
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
164
		"Kpsum": Sum of the 8 Kp indices for the day.
165
			Expressed to the nearest third of a unit.
166
		"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21":
167
			Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600,
168
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
169
		"Apavg":
170
			Arithmetic average of the 8 Ap indices for the day.
171
		"Cp":
172
			Cp or Planetary Daily Character Figure. A qualitative estimate of
173
			overall level of magnetic activity for the day determined from the sum
174
			of the 8 Ap indices. Cp ranges, in steps of one-tenth, from 0 (quiet)
175
			to 2.5 (highly disturbed). "C9":
176
		"isn":
177
			International Sunspot Number.
178
			Records contain the Zurich number through 1980 Dec 31 and the
179
			International Brussels number thereafter.
180
		"f107_adj":
181
			10.7-cm Solar Radio Flux (F10.7) Adjusted to 1 AU.
182
			Measured at Ottawa at 1700 UT daily from 1947 Feb 14 until
183
			1991 May 31 and measured at Penticton at 2000 UT from 1991 Jun 01 on.
184
			Expressed in units of 10-22 W/m2/Hz.
185
		"Q":
186
			Flux Qualifier.
187
			0 indicates flux required no adjustment;
188
			1 indicates flux required adjustment for burst in progress at time of measurement;
189
			2 indicates a flux approximated by either interpolation or extrapolation;
190
			3 indicates no observation; and
191
			4 indicates CSSI interpolation of missing data.
192
		"f107_81ctr_adj":
193
			Centered 81-day arithmetic average of F10.7 (adjusted).
194
		"f107_81lst_adj":
195
			Last 81-day arithmetic average of F10.7 (adjusted).
196
		"f107_obs":
197
			Observed (unadjusted) value of F10.7.
198
		"f107_81ctr_obs":
199
			Centered 81-day arithmetic average of F10.7 (observed).
200
		"f107_81lst_obs":
201
			Last 81-day arithmetic average of F10.7 (observed).
202
	"""
203
	_assert_file_exists(swpath)
204
	sw = np.genfromtxt(
205
		swpath,
206
		skip_header=3,
207
		delimiter=[
208
		#  yy mm dd br rd kp kp kp kp kp kp kp kp Kp
209
			4, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4,
210
		#  ap ap ap ap ap ap ap ap Ap cp c9 is f1  q
211
			4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 6, 2,
212
		#  f2 f3 f4 f5 f6
213
			6, 6, 6, 6, 6],
214
		dtype=(
215
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,"
216
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,i4,f8,i4,"
217
			"f8,f8,f8,f8,f8"
218
		),
219
		names=[
220
			"year", "month", "day", "bsrn", "rotd",
221
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum",
222
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
223
			"Cp", "C9", "isn", "f107_adj", "Q", "f107_81ctr_adj", "f107_81lst_adj",
224
			"f107_obs", "f107_81ctr_obs", "f107_81lst_obs"
225
		]
226
	)[2:-1]
227
	sw = sw[sw["year"] != -1]
228
	ts = pd.to_datetime([
229
		"{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd)
230
		for yy, mm, dd in sw[["year", "month", "day"]]
231
	])
232
	sw_df = pd.DataFrame(sw, index=ts)
233
	# Adjust Kp to 0...9
234
	kpns = list(map("Kp{0}".format, range(0, 23, 3))) + ["Kpsum"]
235
	sw_df[kpns] = 0.1 * sw_df[kpns]
236
	return sw_df
237
238
239
# Common arguments for the public daily and 3h interfaces
240
_SW_COMMON_PARAMS = """
241
Parameters
242
----------
243
swpath_all: `None` or str, optional, default `None`
244
	Filename for the large combined index file including the
245
	historic data, absolute path or relative to the current dir.
246
	`None` uses the package's default file location.
247
swpath_5y: `None` or str, optional, default `None`
248
	Filename for the 5-year index file, absolute path or relative to the current dir.
249
	`None` uses the package's default file location.
250
update: bool, optional, default False
251
	Attempt to update the local data if it is older than `update_interval`.
252
update_interval: str, optional, default "30days"
253
	The time after which the data are considered "old".
254
	By default, no automatic re-download is initiated, set `update` to true.
255
	The online data is updated every 3 hours, thus setting this value to
256
	a shorter time is not needed and not recommended.
257
"""
258
259
260
def _doc_param(**sub):
261
	def dec(obj):
262
		obj.__doc__ = obj.__doc__.format(**sub)
263
		return obj
264
	return dec
265
266
267
@_doc_param(params=_SW_COMMON_PARAMS)
268
def sw_daily(swpath_all=None, swpath_5y=None, update=False, update_interval="30days"):
269
	"""Combined daily Ap, Kp, and f10.7 index values
270
271
	Combines the "historic" and last-5-year data into one dataframe.
272
273
	All arguments are optional and changing them from the defaults should not
274
	be required neither should it be necessary nor is it recommended.
275
	{params}
276
	Returns
277
	-------
278
	sw_df: pandas.DataFrame
279
		The combined parsed space weather data (daily values).
280
		Raises ``IOError`` if the data files cannot be found.
281
		The index is returned timezone-naive but contains UTC timestamps.
282
		To convert to a timezone-aware index, use
283
		:meth:`pandas.DataFrame.tz_localize()`: ``sw_df.tz_localize("utc")``.
284
285
	See Also
286
	--------
287
	ap_kp_3h, read_sw
288
	"""
289
	swpath_all = swpath_all or SW_PATH_ALL
290
	swpath_5y = swpath_5y or SW_PATH_5Y
291
292
	# ensure that the file exists and is up to date
293
	if (
294
		not os.path.exists(swpath_all)
295
		or not os.path.exists(swpath_5y)
296
	):
297
		warn("Could not find space weather data, trying to download.")
298
		update_data(swpath_all=swpath_all, swpath_5y=swpath_5y)
299
300
	if (
301
		# 1460 = 4 * 365
302
		get_file_age(swpath_all) > pd.Timedelta("1460days")
303
		or get_file_age(swpath_5y) > pd.Timedelta(update_interval)
304
	):
305
		if update:
306
			update_data(swpath_all=swpath_all, swpath_5y=swpath_5y)
307
		else:
308
			warn(
309
				"Local data files are older than {0}, pass `update=True` or "
310
				"run `sw.update_data()` manually if you need newer data.".format(
311
					update_interval
312
				)
313
			)
314
315
	df_all = read_sw(swpath_all)
316
	df_5y = read_sw(swpath_5y)
317
	return pd.concat([df_all[:df_5y.index[0]], df_5y[1:]])
318
319
320
@_doc_param(params=_SW_COMMON_PARAMS)
321
def ap_kp_3h(*args, **kwargs):
322
	"""3h values of Ap and Kp
323
324
	Provides the 3-hourly Ap and Kp indices from the full daily data set.
325
326
	Accepts the same arguments as `sw_daily()`.
327
	All arguments are optional and changing them from the defaults should not
328
	be required neither should it be necessary nor is it recommended.
329
	{params}
330
	Returns
331
	-------
332
	sw_df: pandas.DataFrame
333
		The combined Ap and Kp index data (3h values).
334
		The index values are centred at the 3h interval, i.e. at 01:30:00,
335
		04:30:00, 07:30:00, ... and so on.
336
		Raises ``IOError`` if the data files cannot be found.
337
		The index is returned timezone-naive but contains UTC timestamps.
338
		To convert to a timezone-aware index, use
339
		:meth:`pandas.DataFrame.tz_localize()`: ``sw_df.tz_localize("utc")``.
340
341
	See Also
342
	--------
343
	sw_daily
344
	"""
345
	daily_df = sw_daily(*args, **kwargs)
346
	ret = daily_df.copy()
347
	apns = list(map("Ap{0}".format, range(0, 23, 3)))
348
	kpns = list(map("Kp{0}".format, range(0, 23, 3)))
349
	for i, (ap, kp) in enumerate(zip(apns, kpns)):
350
		ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h")
351
		ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h")
352
	sw_ap = pd.concat(map(ret.__getitem__, apns))
353
	sw_kp = pd.concat(map(ret.__getitem__, kpns))
354
	df = pd.DataFrame({"Ap": sw_ap, "Kp": sw_kp})
355
	return df.reindex(df.index.sort_values())
356