Passed
Push — master ( 56563c...5f6eec )
by Stefan
02:03
created

spaceweather.gfz.update_gfz_hp30()   A

Complexity

Conditions 1

Size

Total Lines 15
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 8
nop 5
dl 0
loc 15
rs 10
c 0
b 0
f 0
1
# Copyright (c) 2020--2024 Stefan Bender
2
#
3
# This module is part of pyspaceweather.
4
# pyspaceweather is free software: you can redistribute it or modify
5
# it under the terms of the GNU General Public License as published
6
# by the Free Software Foundation, version 2.
7
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
8
"""Python interface for space weather indices from GFZ Potsdam
9
10
GFZ space weather indices ASCII file parser for python [#]_.
11
Includes parser for the GFZ ASCII files, files in WDC format,
12
and for the Hpo 30 and 60 minute ASCII files [#]_.
13
For the file formats, see the `gfz_xxx_format.txt` files.
14
15
.. [#] https://kp.gfz-potsdam.de/en/
16
.. [#] https://kp.gfz-potsdam.de/en/hp30-hp60
17
"""
18
import os
19
import logging
20
from warnings import warn
21
22
import numpy as np
23
import pandas as pd
24
25
from .core import _assert_file_exists, _dl_file, _resource_filepath
26
27
__all__ = [
28
	"gfz_daily", "gfz_3h", "read_gfz",
29
	"read_gfz_hp",
30
	"get_gfz_age", "update_gfz",
31
	"update_gfz_hp30", "update_gfz_hp60",
32
	"GFZ_PATH_ALL", "GFZ_PATH_30D",
33
	"HP30_PATH_ALL", "HP30_PATH_30D",
34
	"HP60_PATH_ALL", "HP60_PATH_30D",
35
]
36
37
GFZ_URL_ALL = "https://kp.gfz-potsdam.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt"
38
GFZ_URL_30D = "https://kp.gfz-potsdam.de/app/files/Kp_ap_Ap_SN_F107_nowcast.txt"
39
GFZ_FILE_ALL = os.path.basename(GFZ_URL_ALL)
40
GFZ_FILE_30D = os.path.basename(GFZ_URL_30D)
41
GFZ_PATH_ALL = _resource_filepath(GFZ_FILE_ALL)
42
GFZ_PATH_30D = _resource_filepath(GFZ_FILE_30D)
43
44
HP30_URL_ALL = "https://kp.gfz.de/app/files/Hp30_ap30_complete_series.txt"
45
HP30_URL_30D = "https://kp.gfz.de/app/files/Hp30_ap30_nowcast.txt"
46
HP30_FILE_ALL = os.path.basename(HP30_URL_ALL)
47
HP30_FILE_30D = os.path.basename(HP30_URL_30D)
48
HP30_PATH_ALL = _resource_filepath(HP30_FILE_ALL)
49
HP30_PATH_30D = _resource_filepath(HP30_FILE_30D)
50
51
HP60_URL_ALL = "https://kp.gfz.de/app/files/Hp60_ap60_complete_series.txt"
52
HP60_URL_30D = "https://kp.gfz.de/app/files/Hp60_ap60_nowcast.txt"
53
HP60_FILE_ALL = os.path.basename(HP60_URL_ALL)
54
HP60_FILE_30D = os.path.basename(HP60_URL_30D)
55
HP60_PATH_ALL = _resource_filepath(HP60_FILE_ALL)
56
HP60_PATH_30D = _resource_filepath(HP60_FILE_30D)
57
58
59
def get_gfz_age(gfzpath, relative=True):
60
	"""Age of the downloaded data file
61
62
	Retrieves the last update time of the given file or full path.
63
64
	Parameters
65
	----------
66
	gfzpath: str
67
		Filename to check, absolute path or relative to the current dir.
68
	relative: bool, optional, default True
69
		Return the file's age (True) or the last update time (False).
70
71
	Returns
72
	-------
73
	upd: pandas.Timestamp or pandas.Timedelta
74
		The last updated time or the file age, depending on the setting
75
		of `relative` above.
76
		Raises ``IOError`` if the file is not found.
77
	"""
78
	_assert_file_exists(gfzpath)
79
	with open(gfzpath) as fp:
80
		for line in fp:
81
			# forward to last line
82
			pass
83
	upd = pd.to_datetime(line[:10].replace(" ", "-"), utc=True)
0 ignored issues
show
introduced by
The variable line does not seem to be defined in case the for loop on line 80 is not entered. Are you sure this can never be the case?
Loading history...
84
	if relative:
85
		return pd.Timestamp.utcnow() - upd
86
	return upd
87
88
89
def update_gfz(
90
	min_age="1d",
91
	gfzpath_all=GFZ_PATH_ALL, gfzpath_30d=GFZ_PATH_30D,
92
	url_all=GFZ_URL_ALL, url_30d=GFZ_URL_30D,
93
):
94
	"""Update the local space weather index data
95
96
	Updates the local space weather index data from the website [#]_,
97
	given that the 30-day file is older than `min_age`,
98
	or the combined (large) file is older than 30 days.
99
	If the data is missing for some reason, a download will be attempted nonetheless.
100
101
	All arguments are optional and changing them from the defaults should
102
	neither be necessary nor is it recommended.
103
104
	.. [#] https://kp.gfz-potsdam.de/en/
105
106
	Parameters
107
	----------
108
	min_age: str, optional, default "1d"
109
		The time after which a new download will be attempted.
110
		The online data is updated every day, thus setting this value to
111
		a shorter time is not needed and not recommended.
112
	gfzpath_all: str, optional, default depending on package install location
113
		Filename for the large combined index file including the
114
		historic data, absolute path or relative to the current dir.
115
	gfzpath_30d: str, optional, default depending on package install location
116
		Filename for the 30-day (nowcast) index file, absolute path or relative
117
		to the current dir.
118
	url_all: str, optional, default `gfz.GFZ_URL_ALL`
119
		The url of the "historic" data file.
120
	url_30d: str, optional, default `gfz.GFZ_URL_30D`
121
		The url of the data file containing the indices for the last 30 days.
122
123
	Returns
124
	-------
125
	Nothing.
126
	"""
127
	def _update_file(gfzpath, url, min_age):
128
		if not os.path.exists(gfzpath):
129
			logging.info("{0} not found, downloading.".format(gfzpath))
130
			_dl_file(gfzpath, url)
131
			return
132
		if get_gfz_age(gfzpath) < pd.Timedelta(min_age):
133
			logging.info("not updating '{0}'.".format(gfzpath))
134
			return
135
		logging.info("updating '{0}'.".format(gfzpath))
136
		_dl_file(gfzpath, url)
137
138
	# Update the large file after 30 days
139
	_update_file(gfzpath_all, url_all, "30days")
140
	# Don't re-download before `min_age` has passed (1d)
141
	_update_file(gfzpath_30d, url_30d, min_age)
142
143
144
def update_gfz_hp30(
145
	min_age="1d",
146
	gfzpath_all=HP30_PATH_ALL, gfzpath_30d=HP30_PATH_30D,
147
	url_all=HP30_URL_ALL, url_30d=HP30_URL_30D,
148
):
149
	"""Updates the local Hp30 index data
150
151
	See Also
152
	--------
153
	update_gfz
154
	"""
155
	return update_gfz(
156
		min_age=min_age,
157
		gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d,
158
		url_all=url_all, url_30d=url_30d,
159
	)
160
161
162
def update_gfz_hp60(
163
	min_age="1d",
164
	gfzpath_all=HP60_PATH_ALL, gfzpath_30d=HP60_PATH_30D,
165
	url_all=HP60_URL_ALL, url_30d=HP60_URL_30D,
166
):
167
	"""Updates the local Hp60 index data
168
169
	See Also
170
	--------
171
	update_gfz
172
	"""
173
	return update_gfz(
174
		min_age=min_age,
175
		gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d,
176
		url_all=url_all, url_30d=url_30d,
177
	)
178
179
180
def read_gfz(gfzpath):
181
	"""Read and parse space weather index data file
182
183
	Reads the given file and parses it according to the space weather data format.
184
185
	Parameters
186
	----------
187
	gfzpath: str
188
		File to parse, absolute path or relative to the current dir.
189
190
	Returns
191
	-------
192
	gfz_df: pandas.DataFrame
193
		The parsed space weather data (daily values).
194
		Raises an ``IOError`` if the file is not found.
195
196
		The dataframe contains the following columns:
197
198
		"year", "month", "day":
199
			The observation date
200
		"bsrn":
201
			Bartels Solar Rotation Number.
202
			A sequence of 27-day intervals counted continuously from 1832 Feb 8.
203
		"rotd":
204
			Number of Day within the Bartels 27-day cycle (01-27).
205
		"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21":
206
			Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600,
207
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
208
		"Kpsum": Sum of the 8 Kp indices for the day.
209
			Expressed to the nearest third of a unit.
210
		"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21":
211
			Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600,
212
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
213
		"Apavg":
214
			Arithmetic average of the 8 Ap indices for the day.
215
		"isn":
216
			International Sunspot Number.
217
			Records contain the Zurich number through 1980 Dec 31 and the
218
			International Brussels number thereafter.
219
		"f107_obs":
220
			Observed (unadjusted) value of F10.7.
221
		"f107_adj":
222
			10.7-cm Solar Radio Flux (F10.7) Adjusted to 1 AU.
223
			Measured at Ottawa at 1700 UT daily from 1947 Feb 14 until
224
			1991 May 31 and measured at Penticton at 2000 UT from 1991 Jun 01 on.
225
			Expressed in units of 10-22 W/m2/Hz.
226
		"D":
227
			Definitive indicator.
228
			0: Kp and SN preliminary
229
			1: Kp definitive, SN preliminary
230
			2: Kp and SN definitive
231
	"""
232
	_assert_file_exists(gfzpath)
233
	gfz = np.genfromtxt(
234
		gfzpath,
235
		skip_header=3,
236
		delimiter=[
237
		#  yy mm dd dd dm br db kp kp kp kp kp kp kp kp
238
			4, 3, 3, 6, 8, 5, 3, 7, 7, 7, 7, 7, 7, 7, 7,
239
		#  ap ap ap ap ap ap ap ap Ap sn f1 f2 def
240
			5, 5, 5, 5, 5, 5, 5, 5, 6, 4, 9, 9, 2,
241
		],
242
		dtype=(
243
			"i4,i4,i4,i4,f4,i4,i4,f4,f4,f4,f4,f4,f4,f4,"
244
			"f4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,f8,i4,"
245
		),
246
		names=[
247
			"year", "month", "day", "days", "days_m", "bsrn", "rotd",
248
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21",
249
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
250
			"isn", "f107_obs", "f107_adj", "D",
251
		]
252
	)
253
	gfz = gfz[gfz["year"] != -1]
254
	ts = pd.to_datetime([
255
		"{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd)
256
		for yy, mm, dd in gfz[["year", "month", "day"]]
257
	])
258
	gfz_df = pd.DataFrame(gfz, index=ts)
259
	# Sum Kp for compatibility with celestrak dataframe
260
	kpns = list(map("Kp{0}".format, range(0, 23, 3)))
261
	gfz_df.insert(15, "Kpsum", gfz_df[kpns].sum(axis=1))
262
	return gfz_df
263
264
265
def read_gfz_hp(gfzhppath):
266
	"""Read and parse GFZ Hp30 and Hp60 index data file
267
268
	Reads the given file and parses it according to the Hp30 and Hp60 file format.
269
	File format descriptions in [#]_ and [#]_
270
271
	.. [#] https://kp.gfz-potsdam.de/app/format/Hpo_Hp30.txt
272
	.. [#] https://kp.gfz-potsdam.de/app/format/Hpo_Hp60.txt
273
274
	Parameters
275
	----------
276
	gfzhppath: str
277
		File to parse, absolute path or relative to the current dir.
278
279
	Returns
280
	-------
281
	hp_df: pandas.DataFrame
282
		The parsed space weather data (daily values).
283
		Raises an ``IOError`` if the file is not found.
284
285
		The dataframe contains the following columns:
286
287
		"index":
288
			padas.DateTimeIndex of the middle times of the intervals.
289
		"year", "month", "day":
290
			The observation date.
291
		"hh_h":
292
			Starting time in hours of interval.
293
		"hh_m":
294
			Middle time in hours of interval.
295
		"days":
296
			Days since 1932-01-01 00:00 UT to start of interval.
297
		"days_m":
298
			Days since 1932-01-01 00:00 UT to middle of interval.
299
		"Hp":
300
			Hp index during to the interval (30 min or 60 min).
301
		"ap":
302
			ap index during to the interval (30 min or 60 min).
303
		"D":
304
			Reserved for future use, D = 0 for now.
305
	"""
306
	_assert_file_exists(gfzhppath)
307
	hp = np.genfromtxt(
308
		gfzhppath,
309
		delimiter=[
310
		#  yy mm dd hh hm ddd ddm hp ap  D
311
			4, 3, 3, 5, 6, 12, 12, 7, 5, 2,
312
		],
313
		dtype=(
314
			"i4,i4,i4,f4,f4,f4,f4,f4,i4,i4"
315
		),
316
		names=[
317
			"year", "month", "day", "hh_h", "hh_m", "days", "days_m", "Hp", "ap", "D",
318
		]
319
	)
320
	hp = hp[hp["year"] != -1]
321
	ts = pd.to_datetime([
322
		"{0:04d}-{1:02d}-{2:02d} {3:02d}:{4:02d}".format(
323
			yy, mm, dd, int(np.floor(hh_m)), int(60 * (hh_m - np.floor(hh_m)))
324
		)
325
		for yy, mm, dd, hh_m in hp[["year", "month", "day", "hh_m"]]
326
	])
327
	hp_df = pd.DataFrame(hp, index=ts)
328
	return hp_df
329
330
331
def read_gfz_wdc(gfzpath):
332
	"""Parse space weather index data file in WDC format
333
334
	Parses the GFZ index data in WDC format.
335
336
	Parameters
337
	----------
338
	gfzpath: str
339
		File to parse, absolute path or relative to the current dir.
340
341
	Returns
342
	-------
343
	gfz_df: pandas.DataFrame
344
		The parsed space weather data (daily values).
345
		Raises an ``IOError`` if the file is not found.
346
347
		The dataframe contains the following columns:
348
349
		"year", "month", "day":
350
			The observation date
351
		"bsrn":
352
			Bartels Solar Rotation Number.
353
			A sequence of 27-day intervals counted continuously from 1832 Feb 8.
354
		"rotd":
355
			Number of Day within the Bartels 27-day cycle (01-27).
356
		"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21":
357
			Planetary 3-hour Range Index (Kp) for 0000-0300, 0300-0600,
358
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
359
		"Kpsum": Sum of the 8 Kp indices for the day.
360
			Expressed to the nearest third of a unit.
361
		"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21":
362
			Planetary Equivalent Amplitude (Ap) for 0000-0300, 0300-0600,
363
			0600-0900, 0900-1200, 1200-1500, 1500-1800, 1800-2100, 2100-2400 UT
364
		"Apavg":
365
			Arithmetic average of the 8 Ap indices for the day.
366
		"Cp":
367
			Cp index - the daily planetary character figure, a qualitative
368
			estimate of the overall level of geomagnetic activity for this day
369
			determined from the sum of the eight ap amplitudes,
370
			ranging from 0.0 to 2.5 in steps of 0.1.
371
		"C9":
372
			The contracted scale for Cp with only 1 digit, from 0 to 9.
373
	"""
374
	_assert_file_exists(gfzpath)
375
	gfz = np.genfromtxt(
376
		gfzpath,
377
		skip_header=3,
378
		delimiter=[
379
		#  yy mm dd br db kp kp kp kp kp kp kp kp kps
380
			2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
381
		#  ap ap ap ap ap ap ap ap Ap Cp C9
382
			3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1,
383
		],
384
		dtype=(
385
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,"
386
			"i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,"
387
		),
388
		names=[
389
			"year", "month", "day", "bsrn", "rotd",
390
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum",
391
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
392
			"Cp", "C9",
393
		]
394
	)
395
	gfz = gfz[gfz["year"] != -1]
396
	ts = pd.to_datetime([
397
		"{0:04d}-{1:02d}-{2:02d}".format(2000 + yy if yy < 32 else 1900 + yy, mm, dd)
398
		for yy, mm, dd in gfz[["year", "month", "day"]]
399
	])
400
	gfz_df = pd.DataFrame(gfz, index=ts)
401
	gfz_df.loc[:, "year"] = ts.year
402
	# Adjust Kp to 0...9
403
	kpns = list(map("Kp{0}".format, range(0, 23, 3))) + ["Kpsum"]
404
	gfz_df[kpns] = 0.1 * gfz_df[kpns]
405
	return gfz_df
406
407
408
# Common arguments for the public daily and 3h interfaces
409
_GFZ_COMMON_PARAMS = """
410
Parameters
411
----------
412
gfzpath_all: str, optional, default depending on package install location
413
	Filename for the large combined index file including the
414
	historic data, absolute path or relative to the current dir.
415
gfzpath_30d: str, optional, default depending on package install location
416
	Filename for the 30-day (nowcast) index file,
417
	absolute path or relative to the current dir.
418
update: bool, optional, default False
419
	Attempt to update the local data if it is older than `update_interval`.
420
update_interval: str, optional, default "30days"
421
	The time after which the data are considered "old".
422
	By default, no automatic re-download is initiated, set `update` to true.
423
	The online data is updated every 3 hours, thus setting this value to
424
	a shorter time is not needed and not recommended.
425
gfz_format: str, optional, default `None`
426
	The file format to parse the files passed via `gfzpath_all` and `gfzpath_all`.
427
	Use `None`, "default", "gfz", or "standard" for the "standard" GFZ ASCII files.
428
	Use "wdc" to parse files in WDC format into a full-length `pandas.DataFrame`.
429
	Use "hp30" or "hp60" to read the Hp30 and Hp60 data files.
430
"""
431
432
_PARSERS = {
433
	"default": (read_gfz, update_gfz),
434
	"gfz": (read_gfz, update_gfz),
435
	"standard": (read_gfz, update_gfz),
436
	"wdc": (read_gfz_wdc, update_gfz),
437
	"hp30": (read_gfz_hp, update_gfz_hp30),
438
	"hp60": (read_gfz_hp, update_gfz_hp60),
439
}
440
441
442
def _doc_param(**sub):
443
	def dec(obj):
444
		obj.__doc__ = obj.__doc__.format(**sub)
445
		return obj
446
	return dec
447
448
449
@_doc_param(params=_GFZ_COMMON_PARAMS)
450
def gfz_daily(
451
	gfzpath_all=GFZ_PATH_ALL,
452
	gfzpath_30d=GFZ_PATH_30D,
453
	update=False,
454
	update_interval="10days",
455
	gfz_format=None,
456
):
457
	"""Combined daily Ap, Kp, and f10.7 index values
458
459
	Combines the "historic" and last-30-day data into one dataframe.
460
461
	All arguments are optional and changing them from the defaults should not
462
	be required neither should it be necessary nor is it recommended.
463
	{params}
464
	Returns
465
	-------
466
	gfz_df: pandas.DataFrame
467
		The combined parsed space weather data (daily values).
468
		Raises ``IOError`` if the data files cannot be found.
469
470
	See Also
471
	--------
472
	gfz_3h, read_gfz
473
	"""
474
	gfz_format = gfz_format or "gfz"
475
	parse_func, update_func = _PARSERS[gfz_format.lower()]
476
	# ensure that the file exists and is up to date
477
	if (
478
		not os.path.exists(gfzpath_all)
479
		or not os.path.exists(gfzpath_30d)
480
	):
481
		warn("Could not find space weather data, trying to download.")
482
		update_func(gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d)
483
484
	if (
485
		get_gfz_age(gfzpath_all) > pd.Timedelta("30days")
486
		or get_gfz_age(gfzpath_30d) > pd.Timedelta(update_interval)
487
	):
488
		if update:
489
			update_func(gfzpath_all=gfzpath_all, gfzpath_30d=gfzpath_30d)
490
		else:
491
			warn(
492
				"Local data files are older than {0}, pass `update=True` or "
493
				"run `gfz.update_gfz()` manually if you need newer data.".format(
494
					update_interval
495
				)
496
			)
497
498
	df_all = parse_func(gfzpath_all)
499
	df_30d = parse_func(gfzpath_30d)
500
	return pd.concat([df_all, df_30d[df_all.index[-1]:].iloc[1:]])
501
502
503
@_doc_param(params=_GFZ_COMMON_PARAMS)
504
def gfz_3h(*args, **kwargs):
505
	"""3h values of Ap and Kp
506
507
	Provides the 3-hourly Ap and Kp indices from the full daily data set.
508
509
	Accepts the same arguments as `gfz_daily()`.
510
	All arguments are optional and changing them from the defaults should not
511
	be required neither should it be necessary nor is it recommended.
512
	{params}
513
	Returns
514
	-------
515
	gfz_df: pandas.DataFrame
516
		The combined Ap and Kp index data (3h values).
517
		The index values are centred at the 3h interval, i.e. at 01:30:00,
518
		04:30:00, 07:30:00, ... and so on.
519
		Raises ``IOError`` if the data files cannot be found.
520
521
	See Also
522
	--------
523
	gfz_daily
524
	"""
525
	daily_df = gfz_daily(*args, **kwargs)
526
	ret = daily_df.copy()
527
	apns = list(map("Ap{0}".format, range(0, 23, 3)))
528
	kpns = list(map("Kp{0}".format, range(0, 23, 3)))
529
	for i, (ap, kp) in enumerate(zip(apns, kpns)):
530
		ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h")
531
		ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h")
532
	gfz_ap = pd.concat(map(ret.__getitem__, apns))
533
	gfz_kp = pd.concat(map(ret.__getitem__, kpns))
534
	df = pd.DataFrame({"Ap": gfz_ap, "Kp": gfz_kp})
535
	return df.reindex(df.index.sort_values())
536