sciapy.level2.post_process.process_orbit()   F
last analyzed

Complexity

Conditions 15

Size

Total Lines 226
Code Lines 149

Duplication

Lines 21
Ratio 9.29 %

Code Coverage

Tests 94
CRAP Score 16.214

Importance

Changes 0
Metric Value
cc 15
eloc 149
nop 5
dl 21
loc 226
ccs 94
cts 114
cp 0.8246
crap 16.214
rs 2.0999
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like sciapy.level2.post_process.process_orbit() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2018 Stefan Bender
5
#
6
# This file is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify it
8
# under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11 1
"""SCIAMACHY level 2 data post processing
12
13
Main script for SCIAMACHY orbital retrieval post processing
14
and data combining (to netcdf).
15
"""
16
17 1
from __future__ import absolute_import, division, print_function
18
19 1
import glob
20 1
import os
21 1
import argparse as ap
22 1
import datetime as dt
23 1
import logging
24 1
from pkg_resources import resource_filename
25
26 1
import numpy as np
27 1
import pandas as pd
28 1
import xarray as xr
29 1
from scipy.interpolate import interp1d
30
#import aacgmv2
31
#import apexpy
32
33 1
from astropy import units
34 1
from astropy.time import Time
35 1
from astropy.utils import iers
36 1
import astropy.coordinates as coord
37
38 1
import sciapy.level1c as sl
39 1
from .. import __version__
40 1
from . import scia_akm as sa
41 1
from .igrf import gmag_igrf
42 1
from .aacgm2005 import gmag_aacgm2005
43 1
try:
44 1
	from nrlmsise00 import msise_flat as msise
45
except ImportError:
46
	msise = None
47 1
try:
48 1
	from .noem import noem_cpp
49 1
except ImportError:
50 1
	noem_cpp = None
51
52 1
F107_FILE = resource_filename("sciapy", "data/indices/f107_noontime_flux_obs.txt")
53 1
F107A_FILE = resource_filename("sciapy", "data/indices/f107a_noontime_flux_obs.txt")
54 1
AP_FILE = resource_filename("sciapy", "data/indices/spidr_ap_2000-2012.dat")
55 1
F107_ADJ_FILE = resource_filename("sciapy", "data/indices/spidr_f107_2000-2012.dat")
56 1
KP_FILE = resource_filename("sciapy", "data/indices/spidr_kp_2000-2012.dat")
57
58 1
PHI_FAC = 11.91
59 1
LST_FAC = -0.62
60
61 1
iers.conf.auto_download = False
62 1
iers.conf.auto_max_age = None
63
# Use a mirror page for the astrpy time data, see
64
# https://github.com/mzechmeister/serval/issues/33#issuecomment-551156361
65
# and
66
# https://github.com/astropy/astropy/issues/8981#issuecomment-523984247
67 1
iers.conf.iers_auto_url = "https://astroconda.org/aux/astropy_mirror/iers_a_1/finals2000A.all"
68
69
70 1 View Code Duplication
def solar_zenith_angle(alt, lat, lon, time):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
71 1
	atime = Time(time)
72 1
	loc = coord.EarthLocation.from_geodetic(
73
		height=alt * units.km,
74
		lat=lat * units.deg,
75
		lon=lon * units.deg,
76
	)
77 1
	altaz = coord.AltAz(location=loc, obstime=atime)
78 1
	sun = coord.get_sun(atime)
79 1
	return sun.transform_to(altaz).zen.value
80
81
82 1 View Code Duplication
def read_spectra(year, orbit, spec_base=None, skip_upleg=True):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
83
	"""Read and examine SCIAMACHY orbit spectra
84
85
	Reads the limb spactra and extracts the dates, times, latitudes,
86
	longitudes to be used to re-assess the retrieved geolocations.
87
88
	Parameters
89
	----------
90
	year: int
91
		The measurement year to select the corresponding subdir
92
		below `spec_base` (see below).
93
	orbit: int
94
		SCIAMACHY/Envisat orbit number of the spectra.
95
	spec_base: str, optional
96
		The root path to the level 1c spectra. Uses the current
97
		dir if not set or set to `None` (default).
98
	skip_upleg: bool, optional
99
		Skip upleg limb scans, i.e. night time scans. For NO retrievals,
100
		those are not used and should be not used here.
101
		Default: True
102
103
	Returns
104
	-------
105
	(dts, times, lats, lons, mlsts, alsts, eotcorr)
106
	"""
107 1
	fail = (None,) * 7
108 1
	if spec_base is None:
109
		spec_base = os.curdir
110 1
	spec_path = os.path.join(spec_base, "{0}".format(year))
111 1
	spec_path2 = os.path.join(spec_base, "{0}".format(int(year) + 1))
112 1
	logging.debug("spec_path: %s", spec_path)
113 1
	logging.debug("spec_path2: %s", spec_path)
114 1
	if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)):
115
		return fail
116
117
	# the star stands for the (optional) date subdir
118
	# to find all spectra for the orbit
119 1
	spfiles = glob.glob(
120
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
121
			.format(spec_path, orbit))
122
	# sometimes for whatever reason the orbit ends up in the wrong year subdir
123
	# looks in the subdir for the following year as well.
124 1
	spfiles += glob.glob(
125
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
126
			.format(spec_path2, orbit))
127 1
	if len(spfiles) < 2:
128
		return fail
129
130 1
	dts = []
131 1
	times = []
132 1
	lats = []
133 1
	lons = []
134 1
	mlsts = []
135 1
	alsts = []
136
137 1
	sls = sl.scia_limb_scan()
138 1
	for f in sorted(spfiles):
139 1
		sls.read_from_file(f)
140
		# copy the values from the l1c file
141 1
		lat, lon = sls.cent_lat_lon[:2]
142 1
		mlst, alst, eotcorr = sls.local_solar_time(False)
143 1
		tp_lats = sls.limb_data.tp_lat
144 1
		date = sls.date
145
		# debug output if requested
146 1
		logging.debug("file: %s", f)
147 1
		logging.debug("lat: %s, lon: %s", lat, lon)
148 1
		logging.debug("mlst: %s, alst: %s, eotcorr: %s", mlst, alst, eotcorr)
149 1
		logging.debug("tp_lats: %s", tp_lats)
150 1
		logging.debug("date: %s", date)
151 1
		if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5):
152
			# Exclude non-downleg measurements where the latitude
153
			# of the last real tangent point (the last is dark sky)
154
			# is larger than or too close to the first latitude.
155
			# Requires an (empirical) separation of +0.5 degree.
156 1
			logging.debug("excluding upleg point at: %s, %s", lat, lon)
157 1
			continue
158 1
		dtdate = pd.to_datetime(dt.datetime(*date), utc=True)
159 1
		time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0
160 1
		logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr)
161 1
		dts.append(dtdate)
162 1
		times.append(time_hour)
163 1
		lats.append(lat)
164 1
		lons.append(lon)
165 1
		mlsts.append(mlst)
166 1
		alsts.append(alst)
167
168 1
	if len(lats) < 2:
169
		# interpolation will fail with less than 2 points
170
		return fail
171
172 1
	return (np.asarray(dts),
173
			np.asarray(times),
174
			np.asarray(lats),
175
			np.asarray(lons),
176
			np.asarray(mlsts),
177
			np.asarray(alsts), eotcorr)
0 ignored issues
show
introduced by
The variable eotcorr does not seem to be defined in case the for loop on line 138 is not entered. Are you sure this can never be the case?
Loading history...
178
179
180 1
def _get_orbit_ds(filename):
181
	# >= 1.5 (NO-v1.5)
182 1
	columns = [
183
		"id", "alt_max", "alt", "alt_min",
184
		"lat_max", "lat", "lat_min", "lons", "densities",
185
		"dens_err_meas", "dens_err_tot", "dens_tot", "apriori", "akdiag",
186
	]
187
	# peek at the first line to extract the number of columns
188 1
	with open(filename, 'rb') as _f:
189 1
		ncols = len(_f.readline().split())
190
	# reduce the columns depending on the retrieval version
191
	# default is >= 1.5 (NO-v1.5)
192 1
	if ncols < 16:  # < 1.5 (NO_emiss-183-gcaa9349)
193
		columns.remove("akdiag")
194
		if ncols < 15:  # < 1.0 (NO_emiss-178-g729efb0)
195
			columns.remove("apriori")
196
			if ncols < 14:  # initial output << v1.0
197
				columns.remove("lons")
198 1
	sdd_pd = pd.read_table(filename, header=None, names=columns, skiprows=1, sep='\s+')
199 1
	sdd_pd = sdd_pd.set_index("id")
200 1
	logging.debug("orbit ds: %s", sdd_pd.to_xarray())
201 1
	ind = pd.MultiIndex.from_arrays(
202
		[sdd_pd.lat, sdd_pd.alt],
203
		names=["lats", "alts"],
204
	)
205 1
	sdd_ds = xr.Dataset.from_dataframe(sdd_pd).assign(id=ind).unstack("id")
206 1
	logging.debug("orbit dataset: %s", sdd_ds)
207 1
	sdd_ds["lons"] = sdd_ds.lons.mean("alts")
208 1
	sdd_ds.load()
209 1
	logging.debug("orbit ds 2: %s", sdd_ds.stack(id=["lats", "alts"]).reset_index("id"))
210 1
	return sdd_ds
211
212
213 1
class _circ_interp(object):
214
	"""Interpolation on a circle"""
215 1
	def __init__(self, x, y, **kw):
216 1
		self.c_intpf = interp1d(x, np.cos(y), **kw)
217 1
		self.s_intpf = interp1d(x, np.sin(y), **kw)
218
219 1
	def __call__(self, x):
220 1
		return np.arctan2(self.s_intpf(x), self.c_intpf(x))
221
222
223 1
def process_orbit(
224
	orbit,
225
	ref_date="2000-01-01",
226
	dens_path=None,
227
	spec_base=None,
228
	use_msis=True,
229
):
230
	"""Post process retrieved SCIAMACHY orbit
231
232
	Parameters
233
	----------
234
	orbit: int
235
		SCIAMACHY/Envisat orbit number of the results to process.
236
	ref_date: str, optional
237
		Base date to calculate the relative days from,
238
		of the format "%Y-%m-%d". Default: 2000-01-01
239
	dens_path: str, optional
240
		The path to the level 2 data. Uses the current
241
		dir if not set or set to `None` (default).
242
	spec_base: str, optional
243
		The root path to the level 1c spectra. Uses the current
244
		dir if not set or set to `None` (default).
245
246
	Returns
247
	-------
248
	(dts0, time0, lst0, lon0, sdd): tuple
249
		dts0 - days since ref_date at equator crossing (float)
250
		time0 - utc hour into the day at equator crossing (float)
251
		lst0 - apparent local solar time at the equator (float)
252
		lon0 - longitude of the equator crossing (float)
253
		sdd - `scia_density_pp` instance of the post-processed data
254
	"""
255 1
	def _read_gm(fname):
256 1
		return dict(np.genfromtxt(fname, usecols=[0, 2], dtype=None))
257
258 1
	fail = (None,) * 5
259 1
	logging.debug("processing orbit: %s", orbit)
260 1
	dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True)
261 1
	logging.debug("ref date: %s", dtrefdate)
262
263 1
	dfiles = glob.glob(
264
			"{0}/000NO_orbit_{1:05d}_*_Dichten.txt"
265
			.format(dens_path, orbit))
266 1
	if len(dfiles) < 1:
267 1
		return fail
268 1
	logging.debug("dfiles: %s", dfiles)
269 1
	logging.debug("splits: %s", [fn.split('/') for fn in dfiles])
270 1
	ddict = dict([
271
		(fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4]))
272
		for fn in dfiles
273
	])
274 1
	logging.debug("ddict: %s", ddict)
275 1
	year = ddict[sorted(ddict.keys())[0]][-1][:4]
276 1
	logging.debug("year: %s", year)
277
278 1
	dts, times, lats, lons, mlsts, alsts, eotcorr = \
279
		read_spectra(year, orbit, spec_base)
280 1
	if dts is None:
281
		# return early if reading the spectra failed
282
		return fail
283
284 1
	dts = pd.to_datetime(dts, utc=True) - dtrefdate
285 1
	dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts])
286 1
	logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times)
287
288 1
	sdd = _get_orbit_ds(dfiles[0])
289 1
	logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons)
290
291
	# Re-interpolates the location (longitude) and times from the
292
	# limb scan spectra files along the orbit to determine the values
293
	# at the Equator and to fill in possibly missing data.
294
	#
295
	# y values are unit circle angles in radians (0 < phi < 2 pi or -pi < phi < pi)
296
	# longitudes
297 1
	lons_intpf = _circ_interp(
298
		lats[::-1], np.radians(lons[::-1]),
299
		fill_value="extrapolate",
300
	)
301
	# apparent local solar time (EoT corrected)
302 1
	lst_intpf = _circ_interp(
303
		lats[::-1], np.pi / 12. * alsts[::-1],
304
		fill_value="extrapolate",
305
	)
306
	# mean local solar time
307 1
	mst_intpf = _circ_interp(
308
		lats[::-1], np.pi / 12. * mlsts[::-1],
309
		fill_value="extrapolate",
310
	)
311
	# utc time (day)
312 1
	time_intpf = _circ_interp(
313
		lats[::-1], np.pi / 12. * times[::-1],
314
		fill_value="extrapolate",
315
	)
316
	# datetime
317 1
	dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate")
318
319
	# equator values
320 1
	lon0 = np.degrees(lons_intpf(0.)) % 360.
321 1
	lst0 = (lst_intpf(0.) * 12. / np.pi) % 24.
322 1
	mst0 = (mst_intpf(0.) * 12. / np.pi) % 24.
323 1
	time0 = (time_intpf(0.) * 12. / np.pi) % 24.
324 1
	dts_retr_interp0 = dts_retr_interpf(0.)
325 1
	logging.debug("utc day at equator: %s", dts_retr_interp0)
326 1
	logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0)
327
328 1
	sdd["utc_hour"] = ("lats", (time_intpf(sdd.lats) * 12. / np.pi) % 24.)
329 1
	sdd["utc_days"] = ("lats", dts_retr_interpf(sdd.lats))
330
331 1 View Code Duplication
	if "lons" not in sdd.data_vars:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
332
		# recalculate the longitudes
333
		# estimate the equatorial longitude from the
334
		# limb scan latitudes and longitudes
335
		lon0s_tp = lons - PHI_FAC * np.tan(np.radians(lats))
336
		clon0s_tp = np.cos(np.radians(lon0s_tp))
337
		slon0s_tp = np.sin(np.radians(lon0s_tp))
338
		lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1]))
339
		lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi))
340
		logging.info("lon0: %s", lon0)
341
		logging.info("lon0 tp: %s", lon0_tp)
342
		# interpolate to the retrieval latitudes
343
		tg_retr_lats = np.tan(np.radians(sdd.lats))
344
		calc_lons = (tg_retr_lats * PHI_FAC + lon0) % 360.
345
		calc_lons_tp = (tg_retr_lats * PHI_FAC + lon0_tp) % 360.
346
		sdd["lons"] = calc_lons_tp
347
		logging.debug("(calculated) retrieval lons: %s, %s",
348
				calc_lons, calc_lons_tp)
349
	else:
350
		# sdd.lons = sdd.lons % 360.
351 1
		logging.debug("(original) retrieval lons: %s", sdd.lons)
352
353 1
	sdd["mst"] = (sdd.utc_hour + sdd.lons / 15.) % 24.
354 1
	sdd["lst"] = sdd.mst + eotcorr / 60.
355 1
	mean_alt_km = sdd.alts.mean()
356
357 1
	dt_date_this = dt.timedelta(dts_retr_interp0.item()) + dtrefdate
358 1
	logging.info("date: %s", dt_date_this)
359
360 1
	gmlats, gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=mean_alt_km)
361
	# gmlats, gmlons = apexpy.Apex(dt_date_this).geo2qd(sdd.lats, sdd.lons, mean_alt_km)
362 1
	sdd["gm_lats"] = gmlats
363 1
	sdd["gm_lons"] = gmlons
364 1
	logging.debug("geomag. lats: %s, lons: %s", sdd.gm_lats, sdd.gm_lons)
365 1
	aacgmgmlats, aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons)
366
	# aacgmgmlats, aacgmgmlons = aacgmv2.convert(sdd.lats, sdd.lons, mean_alt_km, dt_date_this)
367 1
	sdd["aacgm_gm_lats"] = ("lats", aacgmgmlats)
368 1
	sdd["aacgm_gm_lons"] = ("lats", aacgmgmlons)
369 1
	logging.debug("aacgm geomag. lats: %s, lons: %s",
370
			sdd.aacgm_gm_lats, sdd.aacgm_gm_lons)
371
372
	# current day for MSIS input
373 1
	f107_data = _read_gm(F107_FILE)
374 1
	f107a_data = _read_gm(F107A_FILE)
375 1
	ap_data = _read_gm(AP_FILE)
376 1
	msis_dtdate = dt.timedelta(dts_retr_interp0.item()) + dtrefdate
377 1
	msis_dtdate1 = msis_dtdate - dt.timedelta(days=1)
378 1
	msis_date = msis_dtdate.strftime("%Y-%m-%d").encode()
379 1
	msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode()
380 1
	msis_f107 = f107_data[msis_date1]
381 1
	msis_f107a = f107a_data[msis_date]
382 1
	msis_ap = ap_data[msis_date]
383 1
	logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s",
384
			msis_date, msis_f107a, msis_f107, msis_ap)
385
386
	# previous day for NOEM input
387 1
	f107_adj = _read_gm(F107_ADJ_FILE)
388 1
	kp_data = _read_gm(KP_FILE)
389 1
	noem_dtdate = dt.timedelta(dts_retr_interp0.item() - 1) + dtrefdate
390 1
	noem_date = noem_dtdate.strftime("%Y-%m-%d").encode()
391 1
	noem_f107 = f107_adj[noem_date]
392 1
	noem_kp = kp_data[noem_date]
393 1
	logging.debug("NOEM date: %s, f10.7: %s, kp: %s",
394
			noem_date, noem_f107, noem_kp)
395
396 1
	for var in ["noem_no"]:
397 1
		if var not in sdd.data_vars:
398 1
			sdd[var] = xr.zeros_like(sdd.densities)
399 1
	if "sza" not in sdd.data_vars:
400 1
		sdd["sza"] = xr.zeros_like(sdd.lats)
401 1
	if "akdiag" not in sdd.data_vars:
402
		sdd["akdiag"] = xr.full_like(sdd.densities, np.nan)
403
		#akm_filename = glob.glob('{0}_orbit_{1:05d}_*_AKM*'.format(species, orb))[0]
404
		akm_filename = glob.glob(
405
				"{0}/000NO_orbit_{1:05d}_*_AKM*"
406
				.format(dens_path, orbit))[0]
407
		logging.debug("ak file: %s", akm_filename)
408
		ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat)
409
		logging.debug("ak data: %s", ak)
410
		#ak1a = ak.sum(axis = 3)
411
		#dak1a = np.diagonal(ak1a, axis1=0, axis2=2)
412
		sdd["akdiag"] = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1)
413
414 1
	if msise is not None:
415 1
		_msis_d_t = msise(
416
			msis_dtdate,
417
			sdd.alts.values[None, :],
418
			sdd.lats.values[:, None],
419
			sdd.lons.values[:, None] % 360.,
420
			msis_f107a, msis_f107, msis_ap,
421
			lst=sdd.lst.values[:, None],
422
		)
423 1
		if "temperature" not in sdd.data_vars or use_msis:
424 1
			sdd["temperature"] = xr.zeros_like(sdd.densities)
425 1
			sdd.temperature[:] = _msis_d_t[:, :, -1]
426 1
		if "dens_tot" not in sdd.data_vars or use_msis:
427 1
			sdd["dens_tot"] = xr.zeros_like(sdd.densities)
428 1
			sdd.dens_tot[:] = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2)
429 1
	for i, (lat, lon) in enumerate(
430
			zip(sdd.lats.values, sdd.lons.values)):
431 1
		if noem_cpp is not None:
432
			sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts,
433
					[lat], [lon], noem_f107, noem_kp)[:]
434
		else:
435 1
			sdd.noem_no[i][:] = np.nan
436 1
	sdd.sza[:] = solar_zenith_angle(
437
			mean_alt_km,
438
			sdd.lats, sdd.lons,
439
			(pd.to_timedelta(sdd.utc_days.values, unit="days") + dtrefdate).to_pydatetime(),
440
	)
441 1
	sdd["vmr"] = sdd.densities / sdd.dens_tot * 1.e9  # ppb
442
	# drop unused variables
443 1
	sdd = sdd.drop(["alt_min", "alt", "alt_max", "lat_min", "lat", "lat_max"])
444
	# time and orbit
445 1
	sdd = sdd.expand_dims("time")
446 1
	sdd["time"] = ("time", [dts_retr_interp0])
447 1
	sdd["orbit"] = ("time", [orbit])
448 1
	return dts_retr_interp0, time0, lst0, lon0, sdd
449
450
451 1 View Code Duplication
def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
452
	"""Find SCIAMACHY orbits with retrieved data at a date
453
454
	Parameters
455
	----------
456
	date: str
457
		The date in the format "%Y-%m-%d".
458
	mlt: bool, optional
459
		Look for MLT mode data instead of nominal mode data.
460
		Increases the heuristics to find all MLT orbits.
461
	path: str, optional
462
		The path to the level 2 data. If `None` tries to infer
463
		the data directory from the L2 version using
464
		`./*<L2_version>`. Default: None
465
466
	Returns
467
	-------
468
	orbits: list
469
		List of found orbits with retrieved data files
470
	"""
471 1
	logging.debug("pre-processing: %s", date)
472 1
	if path is None:
473
		density_base = os.curdir
474
		path = "{0}/*{1}".format(density_base, L2_version)
475
		logging.debug("path: %s", path)
476
477 1
	dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format(
478
				path, date.replace("-", "")))
479 1
	orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles])
480 1
	if mlt:
481 1
		orbits.append(orbits[-1] + 1)
482 1
	return orbits
483
484
485 1
def combine_orbit_data(orbits,
486
		ref_date="2000-01-01",
487
		L2_version="v6.2",
488
		dens_path=None, spec_base=None,
489
		save_nc=False):
490
	"""Combine post-processed SCIAMACHY retrieved orbit data
491
492
	Parameters
493
	----------
494
	orbits: list
495
		List of SCIAMACHY/Envisat orbit numbers to process.
496
	ref_date: str, optional
497
		Base date to calculate the relative days from,
498
		of the format "%Y-%m-%d". Default: 2000-01-01
499
	L2_version: str, optional
500
		SCIAMACHY level 2 data version to process
501
	dens_path: str, optional
502
		The path to the level 2 data. If `None` tries to infer
503
		the data directory from the L2 version looking for anything
504
		in the current directory that ends in <L2_version>: `./*<L2_version>`.
505
		Default: None
506
	spec_base: str, optional
507
		The root path to the level 1c spectra. Uses the current
508
		dir if not set or set to `None` (default).
509
	save_nc: bool, optional
510
		Save the intermediate orbit data sets to netcdf files
511
		for debugging.
512
513
	Returns
514
	-------
515
	(sdday, sdday_ds): tuple
516
		`sdday` contains the combined data as a `scia_density_day` instance,
517
		`sdday_ds` contains the same data as a `xarray.Dataset`.
518
	"""
519 1
	if dens_path is None:
520
		# try some heuristics
521
		density_base = os.curdir
522
		dens_path = "{0}/*{1}".format(density_base, L2_version)
523
524 1
	sddayl = []
525 1
	for orbit in sorted(orbits):
526 1
		dateo, timeo, lsto, lono, sdens = process_orbit(orbit,
527
				ref_date=ref_date, dens_path=dens_path, spec_base=spec_base)
528 1
		logging.info(
529
			"orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s",
530
			orbit, dateo, timeo, lsto, lono
531
		)
532 1
		if sdens is not None:
533 1
			sddayl.append(sdens)
534 1
			if save_nc:
535
				sdens.to_netcdf(sdens.filename[:-3] + "nc")
536 1
	if not sddayl:
537
		return None
538 1
	return xr.concat(sddayl, dim="time")
539
540
541 1
VAR_ATTRS = {
542
	"2.1": {
543
		"MSIS_Dens": dict(
544
			units='cm^{-3}',
545
			long_name='total number density (NRLMSIS-00)',
546
		),
547
		"MSIS_Temp": dict(
548
			units='K',
549
			long_name='temperature',
550
			model="NRLMSIS-00",
551
		),
552
	},
553
	"2.2": {
554
	},
555
	"2.3": {
556
		"aacgm_gm_lats": dict(
557
			long_name='geomagnetic_latitude',
558
			model='AACGM2005 at 80 km',
559
			units='degrees_north',
560
		),
561
		"aacgm_gm_lons": dict(
562
			long_name='geomagnetic_longitude',
563
			model='AACGM2005 at 80 km',
564
			units='degrees_east',
565
		),
566
		"orbit": dict(
567
			axis='T', calendar='standard',
568
			long_name='SCIAMACHY/Envisat orbit number',
569
			standard_name="orbit",
570
			units='1',
571
		),
572
	},
573
}
574 1
VAR_RENAME = {
575
	"2.1": {
576
		# Rename to v2.1 variable names
577
		"MSIS_Dens": "TOT_DENS",
578
		"MSIS_Temp": "temperature",
579
	},
580
	"2.2": {
581
	},
582
	"2.3": {
583
	},
584
}
585 1
FLOAT_VARS = [
586
	"altitude", "latitude", "longitude",
587
	"app_LST", "mean_LST", "mean_SZA",
588
	"aacgm_gm_lats", "aacgm_gm_lons",
589
	"gm_lats", "gm_lons",
590
]
591
592
593 1
def sddata_set_attrs(
594
	sdday_ds,
595
	file_version="2.2",
596
	ref_date="2000-01-01",
597
	rename=True,
598
	species="NO",
599
):
600
	"""Customize xarray Dataset variables and attributes
601
602
	Changes the variable names to match those exported from the
603
	`scia_density_day` class.
604
605
	Parameters
606
	----------
607
	sdday_ds: `xarray.Dataset` instance
608
		The combined dataset.
609
	file_version: string "major.minor", optional
610
		The netcdf file datase version, determines some variable
611
		names and attributes.
612
	ref_date: str, optional
613
		Base date to calculate the relative days from,
614
		of the format "%Y-%m-%d". Default: 2000-01-01
615
	rename: bool, optional
616
		Rename the dataset variables to match the
617
		`scia_density_day` exported ones.
618
		Default: True
619
	species: str, optional
620
		The name of the level 2 species, used to prefix the
621
		dataset variables to be named <species>_<variable>.
622
		Default: "NO".
623
	"""
624 1
	if rename:
625 1
		sdday_ds = sdday_ds.rename({
626
			# 2d vars
627
			"akdiag": "{0}_AKDIAG".format(species),
628
			"apriori": "{0}_APRIORI".format(species),
629
			"densities": "{0}_DENS".format(species),
630
			"dens_err_meas": "{0}_ERR".format(species),
631
			"dens_err_tot": "{0}_ETOT".format(species),
632
			"dens_tot": "MSIS_Dens",
633
			"noem_no": "{0}_NOEM".format(species),
634
			"temperature": "MSIS_Temp",
635
			"vmr": "{0}_VMR".format(species),
636
			# 1d vars and dimensions
637
			"alts": "altitude",
638
			"lats": "latitude",
639
			"lons": "longitude",
640
			"lst": "app_LST",
641
			"mst": "mean_LST",
642
			"sza": "mean_SZA",
643
			"utc_hour": "UTC",
644
		})
645
	# relative standard deviation
646 1
	sdday_ds["{0}_RSTD".format(species)] = 100.0 * np.abs(
647
			sdday_ds["{0}_ERR".format(species)] / sdday_ds["{0}_DENS".format(species)])
648
	# fix coordinate attributes
649 1
	sdday_ds["time"].attrs = dict(axis='T', standard_name='time',
650
		calendar='standard', long_name='equatorial crossing time',
651
		units="days since {0}".format(
652
			pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")))
653 1
	sdday_ds["altitude"].attrs = dict(axis='Z', positive='up',
654
		long_name='altitude', standard_name='altitude', units='km')
655 1
	sdday_ds["latitude"].attrs = dict(axis='Y', long_name='latitude',
656
		standard_name='latitude', units='degrees_north')
657
	# Default variable attributes
658 1
	sdday_ds["{0}_DENS".format(species)].attrs = {
659
			"units": "cm^{-3}",
660
			"long_name": "{0} number density".format(species)}
661 1
	sdday_ds["{0}_ERR".format(species)].attrs = {
662
			"units": "cm^{-3}",
663
			"long_name": "{0} density measurement error".format(species)}
664 1
	sdday_ds["{0}_ETOT".format(species)].attrs = {
665
			"units": "cm^{-3}",
666
			"long_name": "{0} density total error".format(species)}
667 1
	sdday_ds["{0}_RSTD".format(species)].attrs = dict(
668
			units='%',
669
			long_name='{0} relative standard deviation'.format(species))
670 1
	sdday_ds["{0}_AKDIAG".format(species)].attrs = dict(
671
			units='1',
672
			long_name='{0} averaging kernel diagonal element'.format(species))
673 1
	sdday_ds["{0}_APRIORI".format(species)].attrs = dict(
674
			units='cm^{-3}', long_name='{0} apriori density'.format(species))
675 1
	sdday_ds["{0}_NOEM".format(species)].attrs = dict(
676
			units='cm^{-3}', long_name='NOEM {0} number density'.format(species))
677 1
	sdday_ds["{0}_VMR".format(species)].attrs = dict(
678
			units='ppb', long_name='{0} volume mixing ratio'.format(species))
679 1
	sdday_ds["MSIS_Dens"].attrs = dict(units='cm^{-3}',
680
			long_name='MSIS total number density',
681
			model="NRLMSIS-00")
682 1
	sdday_ds["MSIS_Temp"].attrs = dict(units='K',
683
			long_name='MSIS temperature',
684
			model="NRLMSIS-00")
685 1
	sdday_ds["longitude"].attrs = dict(long_name='longitude',
686
			standard_name='longitude', units='degrees_east')
687 1
	sdday_ds["app_LST"].attrs = dict(units='hours',
688
			long_name='apparent local solar time')
689 1
	sdday_ds["mean_LST"].attrs = dict(units='hours',
690
			long_name='mean local solar time')
691 1
	sdday_ds["mean_SZA"].attrs = dict(units='degrees',
692
			long_name='solar zenith angle at mean altitude')
693 1
	sdday_ds["UTC"].attrs = dict(units='hours',
694
			long_name='measurement utc time')
695 1
	sdday_ds["utc_days"].attrs = dict(
696
			units='days since {0}'.format(
697
				pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")),
698
			long_name='measurement utc day')
699 1
	sdday_ds["gm_lats"].attrs = dict(long_name='geomagnetic_latitude',
700
			model='IGRF', units='degrees_north')
701 1
	sdday_ds["gm_lons"].attrs = dict(long_name='geomagnetic_longitude',
702
			model='IGRF', units='degrees_east')
703 1
	sdday_ds["aacgm_gm_lats"].attrs = dict(long_name='geomagnetic_latitude',
704
			# model='AACGM2005 80 km',  # v2.3
705
			model='AACGM',  # v2.1, v2.2
706
			units='degrees_north')
707 1
	sdday_ds["aacgm_gm_lons"].attrs = dict(long_name='geomagnetic_longitude',
708
			# model='AACGM2005 80 km',  # v2.3
709
			model='AACGM',  # v2.1, v2.2
710
			units='degrees_east')
711 1
	sdday_ds["orbit"].attrs = dict(
712
			axis='T', calendar='standard',
713
			# long_name='SCIAMACHY/Envisat orbit number',  # v2.3
714
			long_name='orbit',  # v2.1, v2.2
715
			standard_name="orbit",
716
			# units='1',  # v2.3
717
			units='orbit number',  # v2.1, v2.2
718
	)
719
	# Overwrite version-specific variable attributes
720 1
	for _v, _a in VAR_ATTRS[file_version].items():
721 1
		sdday_ds[_v].attrs = _a
722 1
	if rename:
723
		# version specific renaming
724 1
		sdday_ds = sdday_ds.rename(VAR_RENAME[file_version])
725 1
	if int(file_version.split(".")[0]) < 3:
726
		# invert latitudes for backwards-compatitbility
727 1
		sdday_ds = sdday_ds.sortby("latitude", ascending=False)
728
	else:
729
		sdday_ds = sdday_ds.sortby("latitude", ascending=True)
730
731
	# for var in FLOAT_VARS:
732
	# 	_attrs = sdday_ds[var].attrs
733
	# 	sdday_ds[var] = sdday_ds[var].astype('float32')
734
	# 	sdday_ds[var].attrs = _attrs
735
736 1
	dateo = pd.to_datetime(
737
			xr.conventions.decode_cf(sdday_ds[["time"]]).time.data[0],
738
			utc=True,
739
	).strftime("%Y-%m-%d")
740 1
	logging.debug("date %s dataset: %s", dateo, sdday_ds)
741 1
	return sdday_ds
742
743
744 1
def main():
745
	"""SCIAMACHY level 2 post processing
746
	"""
747 1
	logging.basicConfig(level=logging.WARNING,
748
			format="[%(levelname)-8s] (%(asctime)s) "
749
			"%(filename)s:%(lineno)d %(message)s",
750
			datefmt="%Y-%m-%d %H:%M:%S %z")
751
752 1
	parser = ap.ArgumentParser()
753 1
	parser.add_argument("file", default="SCIA_NO.nc",
754
			help="the filename of the output netcdf file")
755 1
	parser.add_argument("-M", "--month", metavar="YEAR-MM",
756
			help="infer start and end dates for month")
757 1
	parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE",
758
			help="colon-separated start and end dates")
759 1
	parser.add_argument("-d", "--dates", help="comma-separated list of dates")
760 1
	parser.add_argument("-B", "--base_date",
761
			metavar="YEAR-MM-DD", default="2000-01-01",
762
			help="Reference date to base the time values (days) on "
763
			"(default: %(default)s).")
764 1
	parser.add_argument("-f", "--orbit_file",
765
			help="the file containing the input orbits")
766 1
	parser.add_argument("-r", "--retrieval_version", default="v6.2",
767
			help="SCIAMACHY level 2 data version to process")
768 1
	parser.add_argument("-R", "--file_version", default="2.2",
769
			help="Postprocessing format version of the output file")
770 1
	parser.add_argument("-A", "--author", default="unknown",
771
			help="Author of the post-processed data set "
772
			"(default: %(default)s)")
773 1
	parser.add_argument("-p", "--path", default=None,
774
			help="path containing the L2 data")
775 1
	parser.add_argument("-s", "--spectra", default=None, metavar="PATH",
776
			help="path containing the L1c spectra")
777 1
	parser.add_argument("-m", "--mlt", action="store_true", default=False,
778
			help="indicate nominal (False, default) or MLT data (True)")
779 1
	parser.add_argument("-X", "--xarray", action="store_true", default=False,
780
			help="DEPRECATED, kept for compatibility reasons, does nothing.")
781 1
	loglevels = parser.add_mutually_exclusive_group()
782 1
	loglevels.add_argument("-l", "--loglevel", default="WARNING",
783
			choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
784
			help="change the loglevel (default: 'WARNING')")
785 1
	loglevels.add_argument("-q", "--quiet", action="store_true", default=False,
786
			help="less output, same as --loglevel=ERROR (default: False)")
787 1
	loglevels.add_argument("-v", "--verbose", action="store_true", default=False,
788
			help="verbose output, same as --loglevel=INFO (default: False)")
789 1
	args = parser.parse_args()
790 1
	if args.quiet:
791
		logging.getLogger().setLevel(logging.ERROR)
792 1
	elif args.verbose:
793
		logging.getLogger().setLevel(logging.INFO)
794
	else:
795 1
		logging.getLogger().setLevel(args.loglevel)
796
797 1
	logging.info("processing L2 version: %s", args.retrieval_version)
798 1
	logging.info("writing data file version: %s", args.file_version)
799
800 1
	pddrange = []
801 1
	if args.month is not None:
802 1
		d0 = pd.to_datetime(args.month + "-01", utc=True)
803 1
		pddrange.extend(pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd()))
804 1
	if args.date_range is not None:
805
		pddrange.extend(pd.date_range(*args.date_range.split(':')))
806 1
	if args.dates is not None:
807
		pddrange.extend(pd.to_datetime(args.dates.split(','), utc=True))
808 1
	logging.debug("pddrange: %s", pddrange)
809
810 1
	olist = []
811 1
	for date in pddrange:
812 1
		try:
813 1
			olist += get_orbits_from_date(date.strftime("%Y-%m-%d"),
814
				mlt=args.mlt, path=args.path, L2_version=args.retrieval_version)
815 1
		except:  # handle NaT
816 1
			pass
817 1
	if args.orbit_file is not None:
818
		olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist()
819 1
	logging.debug("olist: %s", olist)
820
821 1
	if not olist:
822
		logging.warn("No orbits to process.")
823
		return
824
825 1
	sd_xr = combine_orbit_data(olist,
826
			ref_date=args.base_date,
827
			L2_version=args.retrieval_version,
828
			dens_path=args.path, spec_base=args.spectra, save_nc=False)
829
830 1
	if sd_xr is None:
831
		logging.warn("Processed data is empty.")
832
		return
833
834 1
	sd_xr = sddata_set_attrs(sd_xr, ref_date=args.base_date, file_version=args.file_version)
835 1
	sd_xr = sd_xr[sorted(sd_xr.variables)]
836 1
	sd_xr.attrs["author"] = args.author
837 1
	sd_xr.attrs["creation_time"] = dt.datetime.utcnow().strftime(
838
			"%a %b %d %Y %H:%M:%S +00:00 (UTC)")
839 1
	sd_xr.attrs["software"] = "sciapy {0}".format(__version__)
840 1
	sd_xr.attrs["L2_data_version"] = args.retrieval_version
841 1
	sd_xr.attrs["version"] = args.file_version
842 1
	logging.debug(sd_xr)
843 1
	sd_xr.to_netcdf(args.file, unlimited_dims=["time"])
844
845
846 1
if __name__ == "__main__":
847
	main()
848