Passed
Push — master ( 72d69e...f33ca4 )
by Stefan
05:42
created

sciapy.level2.post_process.read_spectra()   C

Complexity

Conditions 9

Size

Total Lines 96
Code Lines 56

Duplication

Lines 96
Ratio 100 %

Importance

Changes 0
Metric Value
cc 9
eloc 56
nop 4
dl 96
loc 96
rs 6.1066
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2018 Stefan Bender
5
#
6
# This file is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify it
8
# under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11
"""SCIAMACHY level 2 data post processing
12
13
Main script for SCIAMACHY orbital retrieval post processing
14
and data combining (to netcdf).
15
"""
16
17
from __future__ import absolute_import, division, print_function
18
19
import glob
20
import os
21
import argparse as ap
22
import datetime as dt
23
import logging
24
from pkg_resources import resource_filename
25
26
import numpy as np
27
import pandas as pd
28
import xarray as xr
29
from scipy.interpolate import interp1d
30
#import aacgmv2
31
#import apexpy
32
33
from astropy import units
34
from astropy.time import Time
35
import astropy.coordinates as coord
36
37
import sciapy.level1c as sl
38
from . import scia_akm as sa
39
from .igrf import gmag_igrf
40
from .aacgm2005 import gmag_aacgm2005
41
try:
42
	from nrlmsise00 import msise_flat as msise
43
except ImportError:
44
	msise = None
45
try:
46
	from .noem import noem_cpp
47
except ImportError:
48
	noem_cpp = None
49
50
F107_FILE = resource_filename("sciapy", "data/indices/f107_noontime_flux_obs.txt")
51
F107A_FILE = resource_filename("sciapy", "data/indices/f107a_noontime_flux_obs.txt")
52
AP_FILE = resource_filename("sciapy", "data/indices/spidr_ap_2000-2012.dat")
53
F107_ADJ_FILE = resource_filename("sciapy", "data/indices/spidr_f107_2000-2012.dat")
54
KP_FILE = resource_filename("sciapy", "data/indices/spidr_kp_2000-2012.dat")
55
56
PHI_FAC = 11.91
57
LST_FAC = -0.62
58
59
60 View Code Duplication
def solar_zenith_angle(alt, lat, lon, time):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
61
	atime = Time(time)
62
	loc = coord.EarthLocation.from_geodetic(
63
		height=alt * units.km,
64
		lat=lat * units.deg,
65
		lon=lon * units.deg,
66
	)
67
	altaz = coord.AltAz(location=loc, obstime=atime)
68
	sun = coord.get_sun(atime)
69
	return sun.transform_to(altaz).zen.value
70
71
72 View Code Duplication
def read_spectra(year, orbit, spec_base=None, skip_upleg=True):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
73
	"""Read and examine SCIAMACHY orbit spectra
74
75
	Reads the limb spactra and extracts the dates, times, latitudes,
76
	longitudes to be used to re-assess the retrieved geolocations.
77
78
	Parameters
79
	----------
80
	year: int
81
		The measurement year to select the corresponding subdir
82
		below `spec_base` (see below).
83
	orbit: int
84
		SCIAMACHY/Envisat orbit number of the spectra.
85
	spec_base: str, optional
86
		The root path to the level 1c spectra. Uses the current
87
		dir if not set or set to `None` (default).
88
	skip_upleg: bool, optional
89
		Skip upleg limb scans, i.e. night time scans. For NO retrievals,
90
		those are not used and should be not used here.
91
		Default: True
92
93
	Returns
94
	-------
95
	(dts, times, lats, lons, mlsts, alsts, eotcorr)
96
	"""
97
	fail = (None,) * 7
98
	if spec_base is None:
99
		spec_base = os.curdir
100
	spec_path = os.path.join(spec_base, "{0}".format(year))
101
	spec_path2 = os.path.join(spec_base, "{0}".format(int(year) + 1))
102
	logging.debug("spec_path: %s", spec_path)
103
	logging.debug("spec_path2: %s", spec_path)
104
	if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)):
105
		return fail
106
107
	# the star stands for the (optional) date subdir
108
	# to find all spectra for the orbit
109
	spfiles = glob.glob(
110
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
111
			.format(spec_path, orbit))
112
	# sometimes for whatever reason the orbit ends up in the wrong year subdir
113
	# looks in the subdir for the following year as well.
114
	spfiles += glob.glob(
115
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
116
			.format(spec_path2, orbit))
117
	if len(spfiles) < 2:
118
		return fail
119
120
	dts = []
121
	times = []
122
	lats = []
123
	lons = []
124
	mlsts = []
125
	alsts = []
126
127
	sls = sl.scia_limb_scan()
128
	for f in sorted(spfiles):
129
		sls.read_from_file(f)
130
		# copy the values from the l1c file
131
		lat, lon = sls.cent_lat_lon[:2]
132
		mlst, alst, eotcorr = sls.local_solar_time(False)
133
		tp_lats = sls.limb_data.tp_lat
134
		date = sls.date
135
		# debug output if requested
136
		logging.debug("file: %s", f)
137
		logging.debug("lat: %s, lon: %s", lat, lon)
138
		logging.debug("mlst: %s, alst: %s, eotcorr: %s", mlst, alst, eotcorr)
139
		logging.debug("tp_lats: %s", tp_lats)
140
		logging.debug("date: %s", date)
141
		if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5):
142
			# Exclude non-downleg measurements where the latitude
143
			# of the last real tangent point (the last is dark sky)
144
			# is larger than or too close to the first latitude.
145
			# Requires an (empirical) separation of +0.5 degree.
146
			logging.debug("excluding upleg point at: %s, %s", lat, lon)
147
			continue
148
		dtdate = pd.to_datetime(dt.datetime(*date), utc=True)
149
		time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0
150
		logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr)
151
		dts.append(dtdate)
152
		times.append(time_hour)
153
		lats.append(lat)
154
		lons.append(lon)
155
		mlsts.append(mlst)
156
		alsts.append(alst)
157
158
	if len(lats) < 2:
159
		# interpolation will fail with less than 2 points
160
		return fail
161
162
	return (np.asarray(dts),
163
			np.asarray(times),
164
			np.asarray(lats),
165
			np.asarray(lons),
166
			np.asarray(mlsts),
167
			np.asarray(alsts), eotcorr)
0 ignored issues
show
introduced by
The variable eotcorr does not seem to be defined in case the for loop on line 128 is not entered. Are you sure this can never be the case?
Loading history...
168
169
170 View Code Duplication
def _get_orbit_ds(filename):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
171
	# >= 1.5 (NO-v1.5)
172
	columns = [
173
		"id", "alt_max", "alt", "alt_min",
174
		"lat_max", "lat", "lat_min", "lons", "densities",
175
		"dens_err_meas", "dens_err_tot", "dens_tot", "apriori", "akdiag",
176
	]
177
	# peek at the first line to extract the number of columns
178
	with open(filename, 'rb') as _f:
179
		ncols = len(_f.readline().split())
180
	# reduce the columns depending on the retrieval version
181
	# default is >= 1.5 (NO-v1.5)
182
	if ncols < 16:  # < 1.5 (NO_emiss-183-gcaa9349)
183
		columns.remove("akdiag")
184
		if ncols < 15:  # < 1.0 (NO_emiss-178-g729efb0)
185
			columns.remove("apriori")
186
			if ncols < 14:  # initial output << v1.0
187
				columns.remove("lons")
188
	sdd_pd = pd.read_table(filename, header=None, names=columns, skiprows=1, sep='\s+')
189
	sdd_pd = sdd_pd.set_index("id")
190
	logging.debug("orbit ds: %s", sdd_pd.to_xarray())
191
	ind = pd.MultiIndex.from_arrays(
192
		[sdd_pd.lat, sdd_pd.alt],
193
		names=["lats", "alts"],
194
	)
195
	sdd_ds = xr.Dataset.from_dataframe(sdd_pd).assign(id=ind).unstack("id")
196
	logging.debug("orbit dataset: %s", sdd_ds)
197
	sdd_ds["lons"] = sdd_ds.lons.mean("alts")
198
	sdd_ds.load()
199
	logging.debug("orbit ds 2: %s", sdd_ds.stack(id=["lats", "alts"]).reset_index("id"))
200
	return sdd_ds
201
202
203
class _circ_interp(object):
204
	"""Interpolation on a circle"""
205
	def __init__(self, x, y, **kw):
206
		self.c_intpf = interp1d(x, np.cos(y), **kw)
207
		self.s_intpf = interp1d(x, np.sin(y), **kw)
208
209
	def __call__(self, x):
210
		return np.arctan2(self.s_intpf(x), self.c_intpf(x))
211
212
213 View Code Duplication
def process_orbit(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
214
	orbit,
215
	ref_date="2000-01-01",
216
	dens_path=None,
217
	spec_base=None,
218
	use_msis=True,
219
):
220
	"""Post process retrieved SCIAMACHY orbit
221
222
	Parameters
223
	----------
224
	orbit: int
225
		SCIAMACHY/Envisat orbit number of the results to process.
226
	ref_date: str, optional
227
		Base date to calculate the relative days from,
228
		of the format "%Y-%m-%d". Default: 2000-01-01
229
	dens_path: str, optional
230
		The path to the level 2 data. Uses the current
231
		dir if not set or set to `None` (default).
232
	spec_base: str, optional
233
		The root path to the level 1c spectra. Uses the current
234
		dir if not set or set to `None` (default).
235
236
	Returns
237
	-------
238
	(dts0, time0, lst0, lon0, sdd): tuple
239
		dts0 - days since ref_date at equator crossing (float)
240
		time0 - utc hour into the day at equator crossing (float)
241
		lst0 - apparent local solar time at the equator (float)
242
		lon0 - longitude of the equator crossing (float)
243
		sdd - `scia_density_pp` instance of the post-processed data
244
	"""
245
	def _read_gm(fname):
246
		return dict(np.genfromtxt(fname, usecols=[0, 2], dtype=None))
247
248
	fail = (None,) * 5
249
	logging.debug("processing orbit: %s", orbit)
250
	dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True)
251
	logging.debug("ref date: %s", dtrefdate)
252
253
	dfiles = glob.glob(
254
			"{0}/000NO_orbit_{1:05d}_*_Dichten.txt"
255
			.format(dens_path, orbit))
256
	if len(dfiles) < 1:
257
		return fail
258
	logging.debug("dfiles: %s", dfiles)
259
	logging.debug("splits: %s", [fn.split('/') for fn in dfiles])
260
	ddict = dict([
261
		(fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4]))
262
		for fn in dfiles
263
	])
264
	logging.debug("ddict: %s", ddict)
265
	year = ddict[sorted(ddict.keys())[0]][-1][:4]
266
	logging.debug("year: %s", year)
267
268
	dts, times, lats, lons, mlsts, alsts, eotcorr = \
269
		read_spectra(year, orbit, spec_base)
270
	if dts is None:
271
		# return early if reading the spectra failed
272
		return fail
273
274
	dts = pd.to_datetime(dts, utc=True) - dtrefdate
275
	dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts])
276
	logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times)
277
278
	sdd = _get_orbit_ds(dfiles[0])
279
	logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons)
280
281
	# Re-interpolates the location (longitude) and times from the
282
	# limb scan spectra files along the orbit to determine the values
283
	# at the Equator and to fill in possibly missing data.
284
	#
285
	# y values are unit circle angles in radians (0 < φ < 2π or -π < φ < π)
286
	# longitudes
287
	lons_intpf = _circ_interp(
288
		lats[::-1], np.radians(lons[::-1]),
289
		fill_value="extrapolate",
290
	)
291
	# apparent local solar time (EoT corrected)
292
	lst_intpf = _circ_interp(
293
		lats[::-1], np.pi / 12. * alsts[::-1],
294
		fill_value="extrapolate",
295
	)
296
	# mean local solar time
297
	mst_intpf = _circ_interp(
298
		lats[::-1], np.pi / 12. * mlsts[::-1],
299
		fill_value="extrapolate",
300
	)
301
	# utc time (day)
302
	time_intpf = _circ_interp(
303
		lats[::-1], np.pi / 12. * times[::-1],
304
		fill_value="extrapolate",
305
	)
306
	# datetime
307
	dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate")
308
309
	# equator values
310
	lon0 = np.degrees(lons_intpf(0.)) % 360.
311
	lst0 = (lst_intpf(0.) * 12. / np.pi) % 24.
312
	mst0 = (mst_intpf(0.) * 12. / np.pi) % 24.
313
	time0 = (time_intpf(0.) * 12. / np.pi) % 24.
314
	dts_retr_interp0 = dts_retr_interpf(0.)
315
	logging.debug("utc day at equator: %s", dts_retr_interp0)
316
	logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0)
317
318
	sdd["utc_hour"] = ("lats", (time_intpf(sdd.lats) * 12. / np.pi) % 24.)
319
	sdd["utc_days"] = ("lats", dts_retr_interpf(sdd.lats))
320
321
	if "lons" not in sdd.data_vars:
322
		# recalculate the longitudes
323
		# estimate the equatorial longitude from the
324
		# limb scan latitudes and longitudes
325
		lon0s_tp = lons - PHI_FAC * np.tan(np.radians(lats))
326
		clon0s_tp = np.cos(np.radians(lon0s_tp))
327
		slon0s_tp = np.sin(np.radians(lon0s_tp))
328
		lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1]))
329
		lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi))
330
		logging.info("lon0: %s", lon0)
331
		logging.info("lon0 tp: %s", lon0_tp)
332
		# interpolate to the retrieval latitudes
333
		tg_retr_lats = np.tan(np.radians(sdd.lats))
334
		calc_lons = (tg_retr_lats * PHI_FAC + lon0) % 360.
335
		calc_lons_tp = (tg_retr_lats * PHI_FAC + lon0_tp) % 360.
336
		sdd["lons"] = calc_lons_tp
337
		logging.debug("(calculated) retrieval lons: %s, %s",
338
				calc_lons, calc_lons_tp)
339
	else:
340
		# sdd.lons = sdd.lons % 360.
341
		logging.debug("(original) retrieval lons: %s", sdd.lons)
342
343
	sdd["mst"] = (sdd.utc_hour + sdd.lons / 15.) % 24.
344
	sdd["lst"] = sdd.mst + eotcorr / 60.
345
	mean_alt_km = sdd.alts.mean()
346
347
	dt_date_this = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
348
	logging.info("date: %s", dt_date_this)
349
350
	gmlats, gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=mean_alt_km)
351
	# gmlats, gmlons = apexpy.Apex(dt_date_this).geo2qd(sdd.lats, sdd.lons, mean_alt_km)
352
	sdd["gm_lats"] = ("lats", gmlats)
353
	sdd["gm_lons"] = ("lats", gmlons)
354
	logging.debug("geomag. lats: %s, lons: %s", sdd.gm_lats, sdd.gm_lons)
355
	aacgmgmlats, aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons)
356
	# aacgmgmlats, aacgmgmlons = aacgmv2.convert(sdd.lats, sdd.lons, mean_alt_km, dt_date_this)
357
	sdd["aacgm_gm_lats"] = ("lats", aacgmgmlats)
358
	sdd["aacgm_gm_lons"] = ("lats", aacgmgmlons)
359
	logging.debug("aacgm geomag. lats: %s, lons: %s",
360
			sdd.aacgm_gm_lats, sdd.aacgm_gm_lons)
361
362
	# current day for MSIS input
363
	f107_data = _read_gm(F107_FILE)
364
	f107a_data = _read_gm(F107A_FILE)
365
	ap_data = _read_gm(AP_FILE)
366
	msis_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
367
	msis_dtdate1 = msis_dtdate - dt.timedelta(days=1)
368
	msis_date = msis_dtdate.strftime("%Y-%m-%d").encode()
369
	msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode()
370
	msis_f107 = f107_data[msis_date1]
371
	msis_f107a = f107a_data[msis_date]
372
	msis_ap = ap_data[msis_date]
373
	logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s",
374
			msis_date, msis_f107a, msis_f107, msis_ap)
375
376
	# previous day for NOEM input
377
	f107_adj = _read_gm(F107_ADJ_FILE)
378
	kp_data = _read_gm(KP_FILE)
379
	noem_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0) - 1) + dtrefdate
380
	noem_date = noem_dtdate.strftime("%Y-%m-%d").encode()
381
	noem_f107 = f107_adj[noem_date]
382
	noem_kp = kp_data[noem_date]
383
	logging.debug("NOEM date: %s, f10.7: %s, kp: %s",
384
			noem_date, noem_f107, noem_kp)
385
386
	for var in ["noem_no"]:
387
		if var not in sdd.data_vars:
388
			sdd[var] = xr.zeros_like(sdd.densities)
389
	if "sza" not in sdd.data_vars:
390
		sdd["sza"] = xr.zeros_like(sdd.lats)
391
	if "akdiag" not in sdd.data_vars:
392
		sdd["akdiag"] = xr.full_like(sdd.densities, np.nan)
393
		#akm_filename = glob.glob('{0}_orbit_{1:05d}_*_AKM*'.format(species, orb))[0]
394
		akm_filename = glob.glob(
395
				"{0}/000NO_orbit_{1:05d}_*_AKM*"
396
				.format(dens_path, orbit))[0]
397
		logging.debug("ak file: %s", akm_filename)
398
		ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat)
399
		logging.debug("ak data: %s", ak)
400
		#ak1a = ak.sum(axis = 3)
401
		#dak1a = np.diagonal(ak1a, axis1=0, axis2=2)
402
		sdd["akdiag"] = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1)
403
404
	if msise is not None:
405
		_msis_d_t = msise(
406
			msis_dtdate,
407
			sdd.alts.values[None, :],
408
			sdd.lats.values[:, None],
409
			sdd.lons.values[:, None] % 360.,
410
			msis_f107a, msis_f107, msis_ap,
411
			lst=sdd.lst.values[:, None],
412
		)
413
		if "temperature" not in sdd.data_vars or use_msis:
414
			sdd["temperature"] = xr.zeros_like(sdd.densities)
415
			sdd.temperature[:] = _msis_d_t[:, :, -1]
416
		if "dens_tot" not in sdd.data_vars or use_msis:
417
			sdd["dens_tot"] = xr.zeros_like(sdd.densities)
418
			sdd.dens_tot[:] = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2)
419
	for i, (lat, lon) in enumerate(
420
			zip(sdd.lats.values, sdd.lons.values)):
421
		if noem_cpp is not None:
422
			sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts,
423
					[lat], [lon], noem_f107, noem_kp)[:]
424
		else:
425
			sdd.noem_no[i][:] = np.nan
426
	sdd.sza[:] = solar_zenith_angle(
427
			mean_alt_km,
428
			sdd.lats, sdd.lons,
429
			(pd.to_timedelta(sdd.utc_days.values, unit="days") + dtrefdate).to_pydatetime(),
430
	)
431
	sdd["vmr"] = sdd.densities / sdd.dens_tot * 1.e9  # ppb
432
	# drop unused variables
433
	sdd = sdd.drop(["alt_min", "alt", "alt_max", "lat_min", "lat", "lat_max"])
434
	# time and orbit
435
	sdd = sdd.expand_dims("time")
436
	sdd["time"] = ("time", [dts_retr_interp0])
437
	sdd["orbit"] = ("time", [orbit])
438
	return dts_retr_interp0, time0, lst0, lon0, sdd
439
440
441 View Code Duplication
def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
442
	"""Find SCIAMACHY orbits with retrieved data at a date
443
444
	Parameters
445
	----------
446
	date: str
447
		The date in the format "%Y-%m-%d".
448
	mlt: bool, optional
449
		Look for MLT mode data instead of nominal mode data.
450
		Increases the heuristics to find all MLT orbits.
451
	path: str, optional
452
		The path to the level 2 data. If `None` tries to infer
453
		the data directory from the L2 version using
454
		'./*<L2_version>'. Default: None
455
456
	Returns
457
	-------
458
	orbits: list
459
		List of found orbits with retrieved data files
460
	"""
461
	logging.debug("pre-processing: %s", date)
462
	if path is None:
463
		density_base = os.curdir
464
		path = "{0}/*{1}".format(density_base, L2_version)
465
		logging.debug("path: %s", path)
466
467
	dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format(
468
				path, date.replace("-", "")))
469
	orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles])
470
	if mlt:
471
		orbits.append(orbits[-1] + 1)
472
	return orbits
473
474
475 View Code Duplication
def combine_orbit_data(orbits,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
476
		ref_date="2000-01-01",
477
		L2_version="v6.2",
478
		dens_path=None, spec_base=None,
479
		save_nc=False):
480
	"""Combine post-processed SCIAMACHY retrieved orbit data
481
482
	Parameters
483
	----------
484
	orbits: list
485
		List of SCIAMACHY/Envisat orbit numbers to process.
486
	ref_date: str, optional
487
		Base date to calculate the relative days from,
488
		of the format "%Y-%m-%d". Default: 2000-01-01
489
	L2_version: str, optional
490
		SCIAMACHY level 2 data version to process
491
	dens_path: str, optional
492
		The path to the level 2 data. If `None` tries to infer
493
		the data directory from the L2 version looking for anything
494
		in the current directory that ends in <L2_version>: './*<L2_version>'.
495
		Default: None
496
	spec_base: str, optional
497
		The root path to the level 1c spectra. Uses the current
498
		dir if not set or set to `None` (default).
499
	save_nc: bool, optional
500
		Save the intermediate orbit data sets to netcdf files
501
		for debugging.
502
503
	Returns
504
	-------
505
	(sdday, sdday_ds): tuple
506
		`sdday` contains the combined data as a `scia_density_day` instance,
507
		`sdday_ds` contains the same data as a `xarray.Dataset`.
508
	"""
509
	if dens_path is None:
510
		# try some heuristics
511
		density_base = os.curdir
512
		dens_path = "{0}/*{1}".format(density_base, L2_version)
513
514
	sddayl = []
515
	for orbit in sorted(orbits):
516
		dateo, timeo, lsto, lono, sdens = process_orbit(orbit,
517
				ref_date=ref_date, dens_path=dens_path, spec_base=spec_base)
518
		logging.info(
519
			"orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s",
520
			orbit, dateo, timeo, lsto, lono
521
		)
522
		if sdens is not None:
523
			sddayl.append(sdens)
524
			if save_nc:
525
				sdens.to_netcdf(sdens.filename[:-3] + "nc")
526
	if not sddayl:
527
		return None
528
	return xr.concat(sddayl, dim="time")
529
530
531
VAR_ATTRS = {
532
	"2.1": {
533
		"MSIS_Dens": dict(
534
			units='cm^{-3}',
535
			long_name='total number density (NRLMSIS-00)',
536
		),
537
		"MSIS_Temp": dict(
538
			units='K',
539
			long_name='temperature',
540
			model="NRLMSIS-00",
541
		),
542
	},
543
	"2.2": {
544
	},
545
	"2.3": {
546
		"aacgm_gm_lats": dict(
547
			long_name='geomagnetic_latitude',
548
			model='AACGM2005 at 80 km',
549
			units='degrees_north',
550
		),
551
		"aacgm_gm_lons": dict(
552
			long_name='geomagnetic_longitude',
553
			model='AACGM2005 at 80 km',
554
			units='degrees_east',
555
		),
556
		"orbit": dict(
557
			axis='T', calendar='standard',
558
			long_name='SCIAMACHY/Envisat orbit number',
559
			standard_name="orbit",
560
			units='1',
561
		),
562
	},
563
}
564
VAR_RENAME = {
565
	"2.1": {
566
		# Rename to v2.1 variable names
567
		"MSIS_Dens": "TOT_DENS",
568
		"MSIS_Temp": "temperature",
569
	},
570
	"2.2": {
571
	},
572
	"2.3": {
573
	},
574
}
575
FLOAT_VARS = [
576
	"altitude", "latitude", "longitude",
577
	"app_LST", "mean_LST", "mean_SZA",
578
	"aacgm_gm_lats", "aacgm_gm_lons",
579
	"gm_lats", "gm_lons",
580
]
581
582
583 View Code Duplication
def sddata_set_attrs(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
584
	sdday_ds,
585
	file_version="2.2",
586
	ref_date="2000-01-01",
587
	rename=True,
588
	species="NO",
589
):
590
	"""Customize xarray Dataset variables and attributes
591
592
	Changes the variable names to match those exported from the
593
	`scia_density_day` class.
594
595
	Parameters
596
	----------
597
	sdday_ds: `xarray.Dataset` instance
598
		The combined dataset.
599
	file_version: string "major.minor", optional
600
		The netcdf file datase version, determines some variable
601
		names and attributes.
602
	ref_date: str, optional
603
		Base date to calculate the relative days from,
604
		of the format "%Y-%m-%d". Default: 2000-01-01
605
	rename: bool, optional
606
		Rename the dataset variables to match the
607
		`scia_density_day` exported ones.
608
		Default: True
609
	species: str, optional
610
		The name of the level 2 species, used to prefix the
611
		dataset variables to be named <species>_<variable>.
612
		Default: "NO".
613
	"""
614
	if rename:
615
		sdday_ds = sdday_ds.rename({
616
			# 2d vars
617
			"akdiag": "{0}_AKDIAG".format(species),
618
			"apriori": "{0}_APRIORI".format(species),
619
			"densities": "{0}_DENS".format(species),
620
			"dens_err_meas": "{0}_ERR".format(species),
621
			"dens_err_tot": "{0}_ETOT".format(species),
622
			"dens_tot": "MSIS_Dens",
623
			"noem_no": "{0}_NOEM".format(species),
624
			"temperature": "MSIS_Temp",
625
			"vmr": "{0}_VMR".format(species),
626
			# 1d vars and dimensions
627
			"alts": "altitude",
628
			"lats": "latitude",
629
			"lons": "longitude",
630
			"lst": "app_LST",
631
			"mst": "mean_LST",
632
			"sza": "mean_SZA",
633
			"utc_hour": "UTC",
634
		})
635
	# relative standard deviation
636
	sdday_ds["{0}_RSTD".format(species)] = 100.0 * np.abs(
637
			sdday_ds["{0}_ERR".format(species)] / sdday_ds["{0}_DENS".format(species)])
638
	# fix coordinate attributes
639
	sdday_ds["time"].attrs = dict(axis='T', standard_name='time',
640
		calendar='standard', long_name='equatorial crossing time',
641
		units="days since {0}".format(
642
			pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")))
643
	sdday_ds["altitude"].attrs = dict(axis='Z', positive='up',
644
		long_name='altitude', standard_name='altitude', units='km')
645
	sdday_ds["latitude"].attrs = dict(axis='Y', long_name='latitude',
646
		standard_name='latitude', units='degrees_north')
647
	# Default variable attributes
648
	sdday_ds["{0}_DENS".format(species)].attrs = {
649
			"units": "cm^{-3}",
650
			"long_name": "{0} number density".format(species)}
651
	sdday_ds["{0}_ERR".format(species)].attrs = {
652
			"units": "cm^{-3}",
653
			"long_name": "{0} density measurement error".format(species)}
654
	sdday_ds["{0}_ETOT".format(species)].attrs = {
655
			"units": "cm^{-3}",
656
			"long_name": "{0} density total error".format(species)}
657
	sdday_ds["{0}_RSTD".format(species)].attrs = dict(
658
			units='%',
659
			long_name='{0} relative standard deviation'.format(species))
660
	sdday_ds["{0}_AKDIAG".format(species)].attrs = dict(
661
			units='1',
662
			long_name='{0} averaging kernel diagonal element'.format(species))
663
	sdday_ds["{0}_APRIORI".format(species)].attrs = dict(
664
			units='cm^{-3}', long_name='{0} apriori density'.format(species))
665
	sdday_ds["{0}_NOEM".format(species)].attrs = dict(
666
			units='cm^{-3}', long_name='NOEM {0} number density'.format(species))
667
	sdday_ds["{0}_VMR".format(species)].attrs = dict(
668
			units='ppb', long_name='{0} volume mixing ratio'.format(species))
669
	sdday_ds["MSIS_Dens"].attrs = dict(units='cm^{-3}',
670
			long_name='MSIS total number density',
671
			model="NRLMSIS-00")
672
	sdday_ds["MSIS_Temp"].attrs = dict(units='K',
673
			long_name='MSIS temperature',
674
			model="NRLMSIS-00")
675
	sdday_ds["longitude"].attrs = dict(long_name='longitude',
676
			standard_name='longitude', units='degrees_east')
677
	sdday_ds["app_LST"].attrs = dict(units='hours',
678
			long_name='apparent local solar time')
679
	sdday_ds["mean_LST"].attrs = dict(units='hours',
680
			long_name='mean local solar time')
681
	sdday_ds["mean_SZA"].attrs = dict(units='degrees',
682
			long_name='solar zenith angle at mean altitude')
683
	sdday_ds["UTC"].attrs = dict(units='hours',
684
			long_name='measurement utc time')
685
	sdday_ds["utc_days"].attrs = dict(
686
			units='days since {0}'.format(
687
				pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")),
688
			long_name='measurement utc day')
689
	sdday_ds["gm_lats"].attrs = dict(long_name='geomagnetic_latitude',
690
			model='IGRF', units='degrees_north')
691
	sdday_ds["gm_lons"].attrs = dict(long_name='geomagnetic_longitude',
692
			model='IGRF', units='degrees_east')
693
	sdday_ds["aacgm_gm_lats"].attrs = dict(long_name='geomagnetic_latitude',
694
			# model='AACGM2005 80 km',  # v2.3
695
			model='AACGM',  # v2.1, v2.2
696
			units='degrees_north')
697
	sdday_ds["aacgm_gm_lons"].attrs = dict(long_name='geomagnetic_longitude',
698
			# model='AACGM2005 80 km',  # v2.3
699
			model='AACGM',  # v2.1, v2.2
700
			units='degrees_east')
701
	sdday_ds["orbit"].attrs = dict(
702
			axis='T', calendar='standard',
703
			# long_name='SCIAMACHY/Envisat orbit number',  # v2.3
704
			long_name='orbit',  # v2.1, v2.2
705
			standard_name="orbit",
706
			# units='1',  # v2.3
707
			units='orbit number',  # v2.1, v2.2
708
	)
709
	# Overwrite version-specific variable attributes
710
	for _v, _a in VAR_ATTRS[file_version].items():
711
		sdday_ds[_v].attrs = _a
712
	if rename:
713
		# version specific renaming
714
		sdday_ds = sdday_ds.rename(VAR_RENAME[file_version])
715
	if int(file_version.split(".")[0]) < 3:
716
		# invert latitudes for backwards-compatitbility
717
		sdday_ds = sdday_ds.sortby("latitude", ascending=False)
718
	else:
719
		sdday_ds = sdday_ds.sortby("latitude", ascending=True)
720
721
	# for var in FLOAT_VARS:
722
	# 	_attrs = sdday_ds[var].attrs
723
	# 	sdday_ds[var] = sdday_ds[var].astype('float32')
724
	# 	sdday_ds[var].attrs = _attrs
725
726
	dateo = pd.to_datetime(
727
			xr.conventions.decode_cf_variable("date", sdday_ds.time).data[0],
728
			utc=True,
729
	).strftime("%Y-%m-%d")
730
	logging.debug("date %s dataset: %s", dateo, sdday_ds)
731
	return sdday_ds
732
733
734 View Code Duplication
def main():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
735
	"""SCIAMACHY level 2 post processing
736
	"""
737
	logging.basicConfig(level=logging.WARNING,
738
			format="[%(levelname)-8s] (%(asctime)s) "
739
			"%(filename)s:%(lineno)d %(message)s",
740
			datefmt="%Y-%m-%d %H:%M:%S %z")
741
742
	parser = ap.ArgumentParser()
743
	parser.add_argument("file", default="SCIA_NO.nc",
744
			help="the filename of the output netcdf file")
745
	parser.add_argument("-M", "--month", metavar="YEAR-MM",
746
			help="infer start and end dates for month")
747
	parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE",
748
			help="colon-separated start and end dates")
749
	parser.add_argument("-d", "--dates", help="comma-separated list of dates")
750
	parser.add_argument("-B", "--base_date",
751
			metavar="YEAR-MM-DD", default="2000-01-01",
752
			help="Reference date to base the time values (days) on "
753
			"(default: %(default)s).")
754
	parser.add_argument("-f", "--orbit_file",
755
			help="the file containing the input orbits")
756
	parser.add_argument("-r", "--retrieval_version", default="v6.2",
757
			help="SCIAMACHY level 2 data version to process")
758
	parser.add_argument("-R", "--file_version", default="2.2",
759
			help="Postprocessing format version of the output file")
760
	parser.add_argument("-A", "--author", default="unknown",
761
			help="Author of the post-processed data set "
762
			"(default: %(default)s)")
763
	parser.add_argument("-p", "--path", default=None,
764
			help="path containing the L2 data")
765
	parser.add_argument("-s", "--spectra", default=None, metavar="PATH",
766
			help="path containing the L1c spectra")
767
	parser.add_argument("-m", "--mlt", action="store_true", default=False,
768
			help="indicate nominal (False, default) or MLT data (True)")
769
	parser.add_argument("-X", "--xarray", action="store_true", default=False,
770
			help="DEPRECATED, kept for compatibility reasons, does nothing.")
771
	loglevels = parser.add_mutually_exclusive_group()
772
	loglevels.add_argument("-l", "--loglevel", default="WARNING",
773
			choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
774
			help="change the loglevel (default: 'WARNING')")
775
	loglevels.add_argument("-q", "--quiet", action="store_true", default=False,
776
			help="less output, same as --loglevel=ERROR (default: False)")
777
	loglevels.add_argument("-v", "--verbose", action="store_true", default=False,
778
			help="verbose output, same as --loglevel=INFO (default: False)")
779
	args = parser.parse_args()
780
	if args.quiet:
781
		logging.getLogger().setLevel(logging.ERROR)
782
	elif args.verbose:
783
		logging.getLogger().setLevel(logging.INFO)
784
	else:
785
		logging.getLogger().setLevel(args.loglevel)
786
787
	logging.info("processing L2 version: %s", args.retrieval_version)
788
	logging.info("writing data file version: %s", args.file_version)
789
790
	pddrange = []
791
	if args.month is not None:
792
		d0 = pd.to_datetime(args.month + "-01", utc=True)
793
		pddrange += pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd())
794
	if args.date_range is not None:
795
		pddrange += pd.date_range(*args.date_range.split(':'))
796
	if args.dates is not None:
797
		pddrange += pd.to_datetime(args.dates.split(','), utc=True)
798
	logging.debug("pddrange: %s", pddrange)
799
800
	olist = []
801
	for date in pddrange:
802
		try:
803
			olist += get_orbits_from_date(date.strftime("%Y-%m-%d"),
804
				mlt=args.mlt, path=args.path, L2_version=args.retrieval_version)
805
		except:  # handle NaT
806
			pass
807
	if args.orbit_file is not None:
808
		olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist()
809
	logging.debug("olist: %s", olist)
810
811
	if not olist:
812
		logging.warn("No orbits to process.")
813
		return
814
815
	sd_xr = combine_orbit_data(olist,
816
			ref_date=args.base_date,
817
			L2_version=args.retrieval_version,
818
			dens_path=args.path, spec_base=args.spectra, save_nc=False)
819
820
	if sd_xr is None:
821
		logging.warn("Processed data is empty.")
822
		return
823
824
	sd_xr = sddata_set_attrs(sd_xr, ref_date=args.base_date, file_version=args.file_version)
825
	sd_xr = sd_xr[sorted(sd_xr.variables)]
826
	sd_xr.attrs["author"] = args.author
827
	sd_xr.attrs["creation_time"] = dt.datetime.utcnow().strftime(
828
			"%a %b %d %Y %H:%M:%S +00:00 (UTC)")
829
	sd_xr.attrs["software"] = "sciapy {0}".format(__version__)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable __version__ does not seem to be defined.
Loading history...
830
	sd_xr.attrs["L2_data_version"] = args.retrieval_version
831
	sd_xr.attrs["version"] = args.file_version
832
	print(sd_xr)
833
	sd_xr.to_netcdf(args.file, unlimited_dims=["time"])
834
835
836
if __name__ == "__main__":
837
	main()
838