Passed
Push — master ( a37eea...fae0ef )
by Stefan
01:17
created

spaceweather.gfz._doc_param()   A

Complexity

Conditions 1

Size

Total Lines 5
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
# Copyright (c) 2020--2024 Stefan Bender
2
#
3
# This module is part of pyspaceweather.
4
# pyspaceweather is free software: you can redistribute it or modify
5
# it under the terms of the GNU General Public License as published
6
# by the Free Software Foundation, version 2.
7
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
8
"""Python interface for space weather indices from GFZ Potsdam
9
10
GFZ space weather indices ASCII file parser for python [#]_.
11
12
.. [#] https://kp.gfz-potsdam.de/en/
13
"""
14
import os
15
from pkg_resources import resource_filename
16
import logging
17
from warnings import warn
18
19
import numpy as np
20
import pandas as pd
21
22
from .core import _assert_file_exists, _dl_file
23
24
__all__ = [
25
	"gfz_daily", "gfz_3h", "read_gfz",
26
	"get_gfz_age", "update_gfz",
27
	"GFZ_PATH_ALL", "GFZ_PATH_30D",
28
]
29
30
GFZ_URL_ALL = "https://kp.gfz-potsdam.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt"
31
GFZ_URL_30D = "https://kp.gfz-potsdam.de/app/files/Kp_ap_Ap_SN_F107_nowcast.txt"
32
GFZ_FILE_ALL = os.path.basename(GFZ_URL_ALL)
33
GFZ_FILE_30D = os.path.basename(GFZ_URL_30D)
34
GFZ_PATH_ALL = resource_filename(__name__, os.path.join("data", GFZ_FILE_ALL))
35
GFZ_PATH_30D = resource_filename(__name__, os.path.join("data", GFZ_FILE_30D))
36
37
38
def get_gfz_age(gfzpath, relative=True):
39
	"""Age of the downloaded data file
40
41
	Retrieves the last update time of the given file or full path.
42
43
	Parameters
44
	----------
45
	gfzpath: str
46
		Filename to check, absolute path or relative to the current dir.
47
	relative: bool, optional, default True
48
		Return the file's age (True) or the last update time (False).
49
50
	Returns
51
	-------
52
	upd: pandas.Timestamp or pandas.Timedelta
53
		The last updated time or the file age, depending on the setting
54
		of `relative` above.
55
		Raises ``IOError`` if the file is not found.
56
	"""
57
	_assert_file_exists(gfzpath)
58
	with open(gfzpath) as fp:
59
		for line in fp:
60
			# forward to last line
61
			pass
62
	upd = pd.to_datetime(line[:10].replace(" ", "-"), utc=True)
0 ignored issues
show
introduced by
The variable line does not seem to be defined in case the for loop on line 59 is not entered. Are you sure this can never be the case?
Loading history...
63
	if relative:
64
		return pd.Timestamp.utcnow() - upd
65
	return upd
66
67
68
def update_gfz(
69
	min_age="1d",
70
	gfzpath_all=GFZ_PATH_ALL, gfzpath_30d=GFZ_PATH_30D,
71
	url_all=GFZ_URL_ALL, url_30d=GFZ_URL_30D,
72
):
73
	"""Update the local space weather index data
74
75
	Updates the local space weather index data from the website [#]_,
76
	given that the 30-day file is older than `min_age`,
77
	or the combined (large) file is older than 30 days.
78
	If the data is missing for some reason, a download will be attempted nonetheless.
79
80
	All arguments are optional and changing them from the defaults should
81
	neither be necessary nor is it recommended.
82
83
	.. [#] https://kp.gfz-potsdam.de/en/
84
85
	Parameters
86
	----------
87
	min_age: str, optional, default "1d"
88
		The time after which a new download will be attempted.
89
		The online data is updated every day, thus setting this value to
90
		a shorter time is not needed and not recommended.
91
	gfzpath_all: str, optional, default depending on package install location
92
		Filename for the large combined index file including the
93
		historic data, absolute path or relative to the current dir.
94
	gfzpath_30d: str, optional, default depending on package install location
95
		Filename for the 30-day (nowcast) index file, absolute path or relative
96
		to the current dir.
97
	url_all: str, optional, default `gfz.GFZ_URL_ALL`
98
		The url of the "historic" data file.
99
	url_30d: str, optional, default `gfz.GFZ_URL_30D`
100
		The url of the data file containing the indices for the last 30 days.
101
102
	Returns
103
	-------
104
	Nothing.
105
	"""
106
	def _update_file(gfzpath, url, min_age):
107
		if not os.path.exists(gfzpath):
108
			logging.info("{0} not found, downloading.".format(gfzpath))
109
			_dl_file(gfzpath, url)
110
			return
111
		if get_gfz_age(gfzpath) < pd.Timedelta(min_age):
112
			logging.info("not updating '{0}'.".format(gfzpath))
113
			return
114
		logging.info("updating '{0}'.".format(gfzpath))
115
		_dl_file(gfzpath, url)
116
117
	# Update the large file after 30 days
118
	_update_file(gfzpath_all, url_all, "30days")
119
	# Don't re-download before `min_age` has passed (1d)
120
	_update_file(gfzpath_30d, url_30d, min_age)
121
122
123
def read_gfz(gfzpath):
124
	"""Read and parse space weather index data file
125
126
	Reads the given file and parses it according to the space weather data format.
127
128
	Parameters
129
	----------
130
	gfzpath: str
131
		File to parse, absolute path or relative to the current dir.
132
133
	Returns
134
	-------
135
	gfz_df: pandas.DataFrame
136
		The parsed space weather data (daily values).
137
		Raises an ``IOError`` if the file is not found.
138
139
		The dataframe contains the following columns:
140
141
		"year", "month", "day":
142
			The observation date
143
		"bsrn":
144
			Bartels Solar Rotation Number.
145
			A sequence of 27-day intervals counted continuously from 1832 Feb 8.
146
		"rotd":
147
			Number of Day within the Bartels 27-day cycle (01-27).
148
		"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21":
149
			Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600,
150
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
151
		"Kpsum": Sum of the 8 Kp indices for the day.
152
			Expressed to the nearest third of a unit.
153
		"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21":
154
			Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600,
155
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
156
		"Apavg":
157
			Arithmetic average of the 8 Ap indices for the day.
158
		"isn":
159
			International Sunspot Number.
160
			Records contain the Zurich number through 1980 Dec 31 and the
161
			International Brussels number thereafter.
162
		"f107_obs":
163
			Observed (unadjusted) value of F10.7.
164
		"f107_adj":
165
			10.7-cm Solar Radio Flux (F10.7) Adjusted to 1 AU.
166
			Measured at Ottawa at 1700 UT daily from 1947 Feb 14 until
167
			1991 May 31 and measured at Penticton at 2000 UT from 1991 Jun 01 on.
168
			Expressed in units of 10-22 W/m2/Hz.
169
		"D":
170
			Definitive indicator.
171
			0: Kp and SN preliminary
172
			1: Kp definitive, SN preliminary
173
			2: Kp and SN definitive
174
	"""
175
	_assert_file_exists(gfzpath)
176
	gfz = np.genfromtxt(
177
		gfzpath,
178
		skip_header=3,
179
		delimiter=[
180
		#  yy mm dd dd dm br db kp kp kp kp kp kp kp kp
181
			4, 3, 3, 6, 8, 5, 3, 7, 7, 7, 7, 7, 7, 7, 7,
182
		#  ap ap ap ap ap ap ap ap Ap sn f1 f2 def
183
			5, 5, 5, 5, 5, 5, 5, 5, 6, 4, 9, 9, 2,
184
		],
185
		dtype=(
186
			"i4,i4,i4,i4,f4,i4,i4,f4,f4,f4,f4,f4,f4,f4,"
187
			"f4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,f8,i4,"
188
		),
189
		names=[
190
			"year", "month", "day", "days", "days_m", "bsrn", "rotd",
191
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21",
192
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
193
			"isn", "f107_obs", "f107_adj", "D",
194
		]
195
	)
196
	gfz = gfz[gfz["year"] != -1]
197
	ts = pd.to_datetime([
198
		"{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd)
199
		for yy, mm, dd in gfz[["year", "month", "day"]]
200
	])
201
	gfz_df = pd.DataFrame(gfz, index=ts)
202
	# Sum Kp for compatibility with celestrak dataframe
203
	kpns = list(map("Kp{0}".format, range(0, 23, 3)))
204
	gfz_df.insert(15, "Kpsum", gfz_df[kpns].sum(axis=1))
205
	return gfz_df
206
207
208
def read_gfz_wdc(gfzpath):
209
	"""Parse space weather index data file in WDC format
210
211
	Parses the GFZ index data in WDC format.
212
213
	Parameters
214
	----------
215
	gfzpath: str
216
		File to parse, absolute path or relative to the current dir.
217
218
	Returns
219
	-------
220
	gfz_df: pandas.DataFrame
221
		The parsed space weather data (daily values).
222
		Raises an ``IOError`` if the file is not found.
223
224
		The dataframe contains the following columns:
225
226
		"year", "month", "day":
227
			The observation date
228
		"bsrn":
229
			Bartels Solar Rotation Number.
230
			A sequence of 27-day intervals counted continuously from 1832 Feb 8.
231
		"rotd":
232
			Number of Day within the Bartels 27-day cycle (01-27).
233
		"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21":
234
			Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600,
235
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
236
		"Kpsum": Sum of the 8 Kp indices for the day.
237
			Expressed to the nearest third of a unit.
238
		"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21":
239
			Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600,
240
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
241
		"Apavg":
242
			Arithmetic average of the 8 Ap indices for the day.
243
		"Cp":
244
			Cp index - the daily planetary character figure, a qualitative
245
			estimate of the overall level of geomagnetic activity for this day
246
			determined from the sum of the eight ap amplitudes,
247
			ranging from 0.0 to 2.5 in steps of 0.1.
248
		"C9":
249
			The contracted scale for Cp with only 1 digit, from 0 to 9.
250
	"""
251
	_assert_file_exists(gfzpath)
252
	gfz = np.genfromtxt(
253
		gfzpath,
254
		skip_header=3,
255
		delimiter=[
256
		#  yy mm dd br db kp kp kp kp kp kp kp kp kps
257
			2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
258
		#  ap ap ap ap ap ap ap ap Ap Cp C9
259
			3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1,
260
		],
261
		dtype=(
262
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,"
263
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,"
264
		),
265
		names=[
266
			"year", "month", "day", "bsrn", "rotd",
267
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum",
268
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
269
			"Cp", "C9",
270
		]
271
	)
272
	gfz = gfz[gfz["year"] != -1]
273
	ts = pd.to_datetime([
274
		"{0:04d}-{1:02d}-{2:02d}".format(2000 + yy if yy < 32 else 1900 + yy, mm, dd)
275
		for yy, mm, dd in gfz[["year", "month", "day"]]
276
	])
277
	gfz_df = pd.DataFrame(gfz, index=ts)
278
	gfz_df.loc[:, "year"] = ts.year
279
	# Adjust Kp to 0...9
280
	kpns = list(map("Kp{0}".format, range(0, 23, 3))) + ["Kpsum"]
281
	gfz_df[kpns] = 0.1 * gfz_df[kpns]
282
	return gfz_df
283
284
285
# Common arguments for the public daily and 3h interfaces
286
_GFZ_COMMON_PARAMS = """
287
	Parameters
288
	----------
289
	gfzpath_all: str, optional, default depending on package install location
290
		Filename for the large combined index file including the
291
		historic data, absolute path or relative to the current dir.
292
	gfzpath_30d: str, optional, default depending on package install location
293
		Filename for the 30-day (nowcast) index file,
294
		absolute path or relative to the current dir.
295
	update: bool, optional, default False
296
		Attempt to update the local data if it is older than `update_interval`.
297
	update_interval: str, optional, default "30days"
298
		The time after which the data are considered "old".
299
		By default, no automatic re-download is initiated, set `update` to true.
300
		The online data is updated every 3 hours, thus setting this value to
301
		a shorter time is not needed and not recommended.
302
"""
303
304
305
def _doc_param(**sub):
306
	def dec(obj):
307
		obj.__doc__ = obj.__doc__.format(**sub)
308
		return obj
309
	return dec
310
311
312
@_doc_param(params=_GFZ_COMMON_PARAMS)
313
def gfz_daily(gfzpath_all=GFZ_PATH_ALL, gfzpath_30d=GFZ_PATH_30D, update=False, update_interval="10days"):
314
	"""Combined daily Ap, Kp, and f10.7 index values
315
316
	Combines the "historic" and last-30-day data into one dataframe.
317
318
	All arguments are optional and changing them from the defaults should not
319
	be required neither should it be necessary nor is it recommended.
320
	{params}
321
	Returns
322
	-------
323
	gfz_df: pandas.DataFrame
324
		The combined parsed space weather data (daily values).
325
		Raises ``IOError`` if the data files cannot be found.
326
327
	See Also
328
	--------
329
	gfz_3h, read_gfz
330
	"""
331
	# ensure that the file exists and is up to date
332
	if (
333
		not os.path.exists(gfzpath_all)
334
		or not os.path.exists(gfzpath_30d)
335
	):
336
		warn("Could not find space weather data, trying to download.")
337
		update_gfz(gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d)
338
339
	if (
340
		get_gfz_age(gfzpath_all) > pd.Timedelta("30days")
341
		or get_gfz_age(gfzpath_30d) > pd.Timedelta(update_interval)
342
	):
343
		if update:
344
			update_gfz(gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d)
345
		else:
346
			warn(
347
				"Local data files are older than {0}, pass `update=True` or "
348
				"run `gfz.update_gfz()` manually if you need newer data.".format(
349
					update_interval
350
				)
351
			)
352
353
	df_all = read_gfz(gfzpath_all)
354
	df_30d = read_gfz(gfzpath_30d)
355
	return pd.concat([df_all[:df_30d.index[0]], df_30d[1:]])
356
357
358
@_doc_param(params=_GFZ_COMMON_PARAMS)
359
def gfz_3h(*args, **kwargs):
360
	"""3h values of Ap and Kp
361
362
	Provides the 3-hourly Ap and Kp indices from the full daily data set.
363
364
	Accepts the same arguments as `gfz_daily()`.
365
	All arguments are optional and changing them from the defaults should not
366
	be required neither should it be necessary nor is it recommended.
367
	{params}
368
	Returns
369
	-------
370
	gfz_df: pandas.DataFrame
371
		The combined Ap and Kp index data (3h values).
372
		The index values are centred at the 3h interval, i.e. at 01:30:00,
373
		04:30:00, 07:30:00, ... and so on.
374
		Raises ``IOError`` if the data files cannot be found.
375
376
	See Also
377
	--------
378
	gfz_daily
379
	"""
380
	daily_df = gfz_daily(*args, **kwargs)
381
	ret = daily_df.copy()
382
	apns = list(map("Ap{0}".format, range(0, 23, 3)))
383
	kpns = list(map("Kp{0}".format, range(0, 23, 3)))
384
	for i, (ap, kp) in enumerate(zip(apns, kpns)):
385
		ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h")
386
		ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h")
387
	gfz_ap = pd.concat(map(ret.__getitem__, apns))
388
	gfz_kp = pd.concat(map(ret.__getitem__, kpns))
389
	df = pd.DataFrame({"Ap": gfz_ap, "Kp": gfz_kp})
390
	return df.reindex(df.index.sort_values())
391