Passed
Push — master ( de2059...5c9f2d )
by Stefan
01:18
created

spaceweather.core.get_file_age()   A

Complexity

Conditions 4

Size

Total Lines 9
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 8
nop 2
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
# Copyright (c) 2020 Stefan Bender
2
#
3
# This module is part of pyspaceweather.
4
# pyspaceweather is free software: you can redistribute it or modify
5
# it under the terms of the GNU General Public License as published
6
# by the Free Software Foundation, version 2.
7
# See accompanying COPYING.GPLv2 file or http://www.gnu.org/licenses/gpl-2.0.html.
8
"""Python interface for space weather indices
9
10
"""
11
import os
12
from pkg_resources import resource_filename
13
import requests
14
import logging
15
from warnings import warn
16
17
import numpy as np
18
import pandas as pd
19
20
__all__ = [
21
	"sw_daily", "ap_kp_3h", "read_sw",
22
	"get_file_age", "update_data",
23
	"SW_PATH_ALL", "SW_PATH_5Y",
24
]
25
26
DL_URL_ALL = "https://celestrak.com/SpaceData/SW-All.txt"
27
DL_URL_5Y = "https://celestrak.com/SpaceData/SW-Last5Years.txt"
28
SW_FILE_ALL = os.path.basename(DL_URL_ALL)
29
SW_FILE_5Y = os.path.basename(DL_URL_5Y)
30
SW_PATH_ALL = resource_filename(__name__, os.path.join("data", SW_FILE_ALL))
31
SW_PATH_5Y = resource_filename(__name__, os.path.join("data", SW_FILE_5Y))
32
33
34
def _dl_file(swpath, url=DL_URL_ALL):
35
	with requests.get(url, stream=True) as r:
36
		with open(swpath, 'wb') as fd:
37
			for chunk in r.iter_content(chunk_size=1024):
38
				fd.write(chunk)
39
40
41
def get_file_age(swpath, relative=True):
42
	for line in open(swpath):
43
		if line.startswith("UPDATED"):
44
			# closes the file automatically
45
			break
46
	upd = pd.to_datetime(line.lstrip("UPDATED"), utc=True)
0 ignored issues
show
introduced by
The variable line does not seem to be defined in case the for loop on line 42 is not entered. Are you sure this can never be the case?
Loading history...
47
	if relative:
48
		return pd.Timestamp.utcnow() - upd
49
	return upd
50
51
52
def update_data(
53
	min_age="3h",
54
	swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y,
55
	url_all=DL_URL_ALL, url_5y=DL_URL_5Y,
56
):
57
	def _update_file(swpath, url, min_age):
58
		if not os.path.exists(swpath):
59
			logging.info("{0} not found, downloading.".format(swpath))
60
			_dl_file(swpath, url)
61
			return
62
		if get_file_age(swpath) < pd.Timedelta(min_age):
63
			logging.info("not updating '{0}'.".format(swpath))
64
			return
65
		logging.info("updating '{0}'.".format(swpath))
66
		_dl_file(swpath, url)
67
68
	# Update the large file after four years
69
	# to have some overlap with the 5-year data
70
	_update_file(swpath_all, url_all, "4y")
71
	# Don't re-download before `min_age` has passed (3h)
72
	_update_file(swpath_5y, url_5y, min_age)
73
74
75
def read_sw(swpath):
76
	kpns = ["Kp{0}".format(i) for i in range(0, 23, 3)] + ["Kpsum"]
77
	sw = np.genfromtxt(
78
		swpath,
79
		skip_header=3,
80
				# yy mm dd br rd kp kp kp kp kp kp kp kp Kp ap ap ap ap ap ap ap ap Ap cp c9 is f1  q f2 f3 f4 f5 f6
81
		delimiter=[4, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 6, 2, 6, 6, 6, 6, 6],
82
		dtype=   "i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,i4,f8,i4,i4,f8,i4,f8,f8,f8,f8,f8",
83
		names=[
84
			"year", "month", "day", "bsrn", "rotd",
85
			"Kp0", "Kp3", "Kp6", "Kp9", "Kp12", "Kp15", "Kp18", "Kp21", "Kpsum",
86
			"Ap0", "Ap3", "Ap6", "Ap9", "Ap12", "Ap15", "Ap18", "Ap21", "Apavg",
87
			"Cp", "C9", "isn", "f107_adj", "Q", "f107_81ctr_adj", "f107_81lst_adj",
88
			"f107_obs", "f107_81ctr_obs", "f107_81lst_obs"
89
		]
90
	)[2:-1]
91
	sw = sw[sw["year"] != -1]
92
	ts = pd.to_datetime([
93
		"{0:04d}-{1:02d}-{2:02d}".format(yy, mm, dd)
94
		for yy, mm, dd in sw[["year", "month", "day"]]
95
	])
96
	sw_df = pd.DataFrame(sw, index=ts)
97
	sw_df[kpns] = 0.1 * sw_df[kpns]
98
	return sw_df
99
100
101
def sw_daily(swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y, update_interval="30days"):
102
	"""Daily Ap, Kp, and f10.7 index values
103
	"""
104
	# ensure that the file exists and is up to date
105
	if (
106
		not os.path.exists(swpath_all)
107
		or not os.path.exists(swpath_5y)
108
	):
109
		warn("Could not find space weather data, trying to download.")
110
		update_data()
111
112
	if (
113
		get_file_age(swpath_all) > pd.Timedelta("5y")
114
		or get_file_age(swpath_5y) > pd.Timedelta(update_interval)
115
	):
116
		warn("Data files *might* be too old, consider running `sw.update_data()`.")
117
118
	df_all = read_sw(swpath_all)
119
	df_5y = read_sw(swpath_5y)
120
	return pd.concat([df_all[:df_5y.index[0]], df_5y[1:]])
121
122
123
def ap_kp_3h(swpath_all=SW_PATH_ALL, swpath_5y=SW_PATH_5Y, update_interval="30days"):
124
	"""3h Ap and Kp index values
125
	"""
126
	daily_df = sw_daily(
127
		swpath_all=swpath_all, swpath_5y=swpath_5y, update_interval=update_interval
128
	)
129
	ret = daily_df.copy()
130
	apns = ["Ap{0}".format(i) for i in range(0, 23, 3)]
131
	kpns = ["Kp{0}".format(i) for i in range(0, 23, 3)]
132
	for i, (ap, kp) in enumerate(zip(apns, kpns)):
133
		ret[ap].index = daily_df[ap].index + pd.Timedelta((i * 3 + 1.5), unit="h")
134
		ret[kp].index = daily_df[kp].index + pd.Timedelta((i * 3 + 1.5), unit="h")
135
	sw_ap = pd.concat([ret[ap] for ap in apns])
136
	sw_kp = pd.concat([ret[kp] for kp in kpns])
137
	return pd.DataFrame({"Ap": sw_ap, "Kp": sw_kp})
138