Passed
Push — master ( dc22b3...70b10f )
by Stefan
01:26
created

spaceweather.core.sw_daily()   B

Complexity

Conditions 6

Size

Total Lines 35
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 16
nop 4
dl 0
loc 35
rs 8.6666
c 0
b 0
f 0
1
# Copyright (c) 2020 Stefan Bender
2
#
3
# This module is part of pyspaceweather.
4
# pyspaceweather is free software: you can redistribute it or modify
5
# it under the terms of the GNU General Public License as published
6
# by the Free Software Foundation, version 2.
7
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
8
"""Python interface for space weather indices
9
10
"""
11
import os
12
from pkg_resources import resource_filename
13
import requests
14
import logging
15
from warnings import warn
16
17
import numpy as np
18
import pandas as pd
19
20
__all__ = [
21
	"sw_daily", "ap_kp_3h", "read_sw",
22
	"get_file_age", "update_data",
23
	"SW_PATH_ALL", "SW_PATH_5Y",
24
]
25
26
DL_URL_ALL = "https://celestrak.com/SpaceData/SW-All.txt"
27
DL_URL_5Y = "https://celestrak.com/SpaceData/SW-Last5Years.txt"
28
SW_FILE_ALL = os.path.basename(DL_URL_ALL)
29
SW_FILE_5Y = os.path.basename(DL_URL_5Y)
30
SW_PATH_ALL = resource_filename(__name__, os.path.join("data", SW_FILE_ALL))
31
SW_PATH_5Y = resource_filename(__name__, os.path.join("data", SW_FILE_5Y))
32
33
34
def _dl_file(swpath, url=DL_URL_ALL):
35
	with requests.get(url, stream=True) as r:
36
		with open(swpath, 'wb') as fd:
37
			for chunk in r.iter_content(chunk_size=1024):
38
				fd.write(chunk)
39
40
41
def get_file_age(swpath, relative=True):
42
	"""Age of the downloaded data file
43
44
	Retrieves the last update time of the given file or full path.
45
46
	Parameters
47
	----------
48
	swpath: str
49
		Filename to check, absolute path or relative to the current dir.
50
	relative: bool, optional, default True
51
		Return the file's age (True) or the last update time (False).
52
53
	Returns
54
	-------
55
	upd: pd.Timestamp or pd.Timedelta
56
		The last updated time or the file age, depending on the setting
57
		of `relative` above.
58
	"""
59
	for line in open(swpath):
60
		if line.startswith("UPDATED"):
61
			# closes the file automatically
62
			break
63
	upd = pd.to_datetime(line.lstrip("UPDATED"), utc=True)
0 ignored issues
show
introduced by
The variable line does not seem to be defined in case the for loop on line 59 is not entered. Are you sure this can never be the case?
Loading history...
64
	if relative:
65
		return pd.Timestamp.utcnow() - upd
66
	return upd
67
68
69
def update_data(
70
	min_age="3h",
71
	swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y,
72
	url_all=DL_URL_ALL, url_5y=DL_URL_5Y,
73
):
74
	"""Update the local space weather index data
75
76
	Updates the local space weather index data from the website
77
	<https://celestrak.com/SpaceData/>, given that the 5-year file is older
78
	than `min_age`, or the combined (large) file is older than four years.
79
	If the data is missing for some reason, a download will be attempted nonetheless.
80
81
	All arguments are optional and changing them from the defaults should not
82
	be required neither should it be necessary nor is it recommended.
83
84
	Parameters
85
	----------
86
	min_age: str, optional, default "3h"
87
		The time after which a new download will be attempted.
88
		The online data is updated every 3 hours, thus setting this value to
89
		a shorter time is not needed and not recommended.
90
	swpath_all: str, optional, default depending on package install location
91
		Filename for the large combined index file including the
92
		historic data, absolute path or relative to the current dir.
93
	swpath_5y: str, optional, default depending on package install location
94
		Filename for the 5-year index file, absolute path or relative to the current dir.
95
	url_all: str, optional, default `sw.DL_URL_ALL`
96
		The url of the "historic" data file.
97
	url_5y: str, optional, default `sw.DL_URL_5Y`
98
		The url of the data file of containing the indices of the last 5 years.
99
100
	Returns
101
	-------
102
	Nothing.
103
	"""
104
	def _update_file(swpath, url, min_age):
105
		if not os.path.exists(swpath):
106
			logging.info("{0} not found, downloading.".format(swpath))
107
			_dl_file(swpath, url)
108
			return
109
		if get_file_age(swpath) < pd.Timedelta(min_age):
110
			logging.info("not updating '{0}'.".format(swpath))
111
			return
112
		logging.info("updating '{0}'.".format(swpath))
113
		_dl_file(swpath, url)
114
115
	# Update the large file after four years
116
	# to have some overlap with the 5-year data
117
	# 1460 = 4 * 365
118
	_update_file(swpath_all, url_all, "1460days")
119
	# Don't re-download before `min_age` has passed (3h)
120
	_update_file(swpath_5y, url_5y, min_age)
121
122
123
def read_sw(swpath):
124
	"""Read and parse space weather index data file
125
126
	Reads the given file and parses it according to the space weather data format.
127
128
	Parameters
129
	----------
130
	swpath: str
131
		File to parse, absolute path or relative to the current dir.
132
133
	Returns
134
	-------
135
	sw_df: pd.Dataframe
136
		The parsed space weather data (daily values).
137
	"""
138
	kpns = ["Kp{0}".format(i) for i in range(0, 23, 3)] + ["Kpsum"]
139
	sw = np.genfromtxt(
140
		swpath,
141
		skip_header=3,
142
				# yy mm dd br rd kp kp kp kp kp kp kp kp Kp ap ap ap ap ap ap ap ap Ap cp c9 is f1  q f2 f3 f4 f5 f6
143
		delimiter=[4, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 6, 2, 6, 6, 6, 6, 6],
144
		dtype=   "i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,i4,f8,i4,f8,f8,f8,f8,f8",
145
		names=[
146
			"year", "month", "day", "bsrn", "rotd",
147
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum",
148
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
149
			"Cp", "C9", "isn", "f107_adj", "Q", "f107_81ctr_adj", "f107_81lst_adj",
150
			"f107_obs", "f107_81ctr_obs", "f107_81lst_obs"
151
		]
152
	)[2:-1]
153
	sw = sw[sw["year"] != -1]
154
	ts = pd.to_datetime([
155
		"{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd)
156
		for yy, mm, dd in sw[["year", "month", "day"]]
157
	])
158
	sw_df = pd.DataFrame(sw, index=ts)
159
	sw_df[kpns] = 0.1 * sw_df[kpns]
160
	return sw_df
161
162
163
# Common arguments for the public daily and 3h interfaces
164
_SW_COMMON_PARAMS = """
165
	Parameters
166
	----------
167
	swpath_all: str, optional, default depending on package install location
168
		Filename for the large combined index file including the
169
		historic data, absolute path or relative to the current dir.
170
	swpath_5y: str, optional, default depending on package install location
171
		Filename for the 5-year index file, absolute path or relative to the current dir.
172
	update: bool, optional, default False
173
		Attempt to update the local data if it is older than `update_interval`.
174
	update_interval: str, optional, default "30days"
175
		The time after which the data are considered "old".
176
		By default, no automatic re-download is initiated, set `update` to true.
177
		The online data is updated every 3 hours, thus setting this value to
178
		a shorter time is not needed and not recommended.
179
"""
180
181
182
def _doc_param(**sub):
183
	def dec(obj):
184
		obj.__doc__ = obj.__doc__.format(**sub)
185
		return obj
186
	return dec
187
188
189
@_doc_param(params=_SW_COMMON_PARAMS)
190
def sw_daily(swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y, update=False, update_interval="30days"):
191
	"""Combined daily Ap, Kp, and f10.7 index values
192
193
	Combines the "historic" and last-5-year data into one dataframe.
194
195
	All arguments are optional and changing them from the defaults should not
196
	be required neither should it be necessary nor is it recommended.
197
	{params}
198
	Returns
199
	-------
200
	sw_df: pd.Dataframe
201
		The combined parsed space weather data (daily values).
202
	"""
203
	# ensure that the file exists and is up to date
204
	if (
205
		not os.path.exists(swpath_all)
206
		or not os.path.exists(swpath_5y)
207
	):
208
		warn("Could not find space weather data, trying to download.")
209
		update_data()
210
211
	if (
212
		# 1460 = 4 * 365
213
		get_file_age(swpath_all) > pd.Timedelta("1460days")
214
		or get_file_age(swpath_5y) > pd.Timedelta(update_interval)
215
	):
216
		if update:
217
			update_data()
218
		else:
219
			warn("Data files *might* be too old, consider running `sw.update_data()`.")
220
221
	df_all = read_sw(swpath_all)
222
	df_5y = read_sw(swpath_5y)
223
	return pd.concat([df_all[:df_5y.index[0]], df_5y[1:]])
224
225
226
@_doc_param(params=_SW_COMMON_PARAMS)
227
def ap_kp_3h(*args, **kwargs):
228
	"""Extract 3h values of Ap and Kp
229
230
	Extracts 3-hourly Ap and Kp indices from the full daily data set.
231
232
	Accepts the same arguments as `sw_daily()`.
233
	All arguments are optional and changing them from the defaults should not
234
	be required neither should it be necessary nor is it recommended.
235
	{params}
236
	Returns
237
	-------
238
	sw_df: pd.Dataframe
239
		The combined Ap and Kp index data (3h values).
240
241
	See Also
242
	--------
243
	`sw_daily()`
244
	"""
245
	daily_df = sw_daily(*args, **kwargs)
246
	ret = daily_df.copy()
247
	apns = ["Ap{0}".format(i) for i in range(0, 23, 3)]
248
	kpns = ["Kp{0}".format(i) for i in range(0, 23, 3)]
249
	for i, (ap, kp) in enumerate(zip(apns, kpns)):
250
		ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h")
251
		ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h")
252
	sw_ap = pd.concat([ret[ap] for ap in apns])
253
	sw_kp = pd.concat([ret[kp] for kp in kpns])
254
	return pd.DataFrame({"Ap": sw_ap, "Kp": sw_kp})
255