Passed
Push — master ( fa390c...ed9f5a )
by Stefan
08:48
created

scia_post_process_l2.sddata_xr_set_attrs()   C

Complexity

Conditions 5

Size

Total Lines 141
Code Lines 100

Duplication

Lines 141
Ratio 100 %

Importance

Changes 0
Metric Value
cc 5
eloc 100
nop 5
dl 141
loc 141
rs 6.5333
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python
2
# vim: set fileencoding=utf-8
3
"""SCIAMACHY level 2 data post processing
4
5
Main script for SCIAMACHY orbital retrieval post processing
6
and data combining (to netcdf).
7
8
Copyright (c) 2018 Stefan Bender
9
10
This file is part of sciapy.
11
sciapy is free software: you can redistribute it or modify it
12
under the terms of the GNU General Public License as published by
13
the Free Software Foundation, version 2.
14
See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
15
"""
16
17
from __future__ import absolute_import, division, print_function
18
19
import glob
20
import os
21
import argparse as ap
22
import datetime as dt
23
import logging
24
from pkg_resources import resource_filename
25
26
import numpy as np
27
import pandas as pd
28
import xarray as xr
29
from scipy.interpolate import interp1d
30
31
from astropy import units
32
from astropy.time import Time
33
import astropy.coordinates as coord
34
35
import sciapy.level1c as sl
36
from sciapy.level2 import density_pp as sd
37
from sciapy.level2 import scia_akm as sa
38
from sciapy.level2.igrf import gmag_igrf
39
from sciapy.level2.aacgm2005 import gmag_aacgm2005
40
try:
41
	from nrlmsise00 import msise_flat as msise
42
except ImportError:
43
	msise = None
44
try:
45
	from sciapy.level2.noem import noem_cpp
46
except ImportError:
47
	noem_cpp = None
48
49
F107_FILE = resource_filename("sciapy", "data/indices/f107_noontime_flux_obs.txt")
50
F107A_FILE = resource_filename("sciapy", "data/indices/f107a_noontime_flux_obs.txt")
51
AP_FILE = resource_filename("sciapy", "data/indices/spidr_ap_2000-2012.dat")
52
F107_ADJ_FILE = resource_filename("sciapy", "data/indices/spidr_f107_2000-2012.dat")
53
KP_FILE = resource_filename("sciapy", "data/indices/spidr_kp_2000-2012.dat")
54
55
PHI_FAC = 11.91
56
LST_FAC = -0.62
57
58
59 View Code Duplication
def solar_zenith_angle(alt, lat, lon, time):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
60
	atime = Time(time)
61
	loc = coord.EarthLocation.from_geodetic(
62
		height=alt * units.km,
63
		lat=lat * units.deg,
64
		lon=lon * units.deg,
65
	)
66
	altaz = coord.AltAz(location=loc, obstime=atime)
67
	sun = coord.get_sun(atime)
68
	return sun.transform_to(altaz).zen.value
69
70
71 View Code Duplication
def read_spectra(year, orbit, spec_base=None, skip_upleg=True):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
72
	"""Read and examine SCIAMACHY orbit spectra
73
74
	Reads the limb spactra and extracts the dates, times, latitudes,
75
	longitudes to be used to re-assess the retrieved geolocations.
76
77
	Parameters
78
	----------
79
	year: int
80
		The measurement year to select the corresponding subdir
81
		below `spec_base` (see below).
82
	orbit: int
83
		SCIAMACHY/Envisat orbit number of the spectra.
84
	spec_base: str, optional
85
		The root path to the level 1c spectra. Uses the current
86
		dir if not set or set to `None` (default).
87
	skip_upleg: bool, optional
88
		Skip upleg limb scans, i.e. night time scans. For NO retrievals,
89
		those are not used and should be not used here.
90
		Default: True
91
92
	Returns
93
	-------
94
	(dts, times, lats, lons, mlsts, alsts, eotcorr)
95
	"""
96
	fail = (None,) * 7
97
	if spec_base is None:
98
		spec_base = os.curdir
99
	spec_path = os.path.join(spec_base, "{0}".format(year))
100
	spec_path2 = os.path.join(spec_base, "{0}".format(int(year) + 1))
101
	logging.debug("spec_path: %s", spec_path)
102
	logging.debug("spec_path2: %s", spec_path)
103
	if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)):
104
		return fail
105
106
	# the star stands for the (optional) date subdir
107
	# to find all spectra for the orbit
108
	spfiles = glob.glob(
109
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
110
			.format(spec_path, orbit))
111
	# sometimes for whatever reason the orbit ends up in the wrong year subdir
112
	# looks in the subdir for the following year as well.
113
	spfiles += glob.glob(
114
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
115
			.format(spec_path2, orbit))
116
	if len(spfiles) < 2:
117
		return fail
118
119
	dts = []
120
	times = []
121
	lats = []
122
	lons = []
123
	mlsts = []
124
	alsts = []
125
126
	sls = sl.scia_limb_scan()
127
	for f in sorted(spfiles):
128
		sls.read_from_file(f)
129
		# copy the values from the l1c file
130
		lat, lon = sls.cent_lat_lon[:2]
131
		mlst, alst, eotcorr = sls.local_solar_time(False)
132
		tp_lats = sls.limb_data.tp_lat
133
		date = sls.date
134
		# debug output if requested
135
		logging.debug("file: %s", f)
136
		logging.debug("lat: %s, lon: %s", lat, lon)
137
		logging.debug("mlst: %s, alst: %s, eotcorr: %s", mlst, alst, eotcorr)
138
		logging.debug("tp_lats: %s", tp_lats)
139
		logging.debug("date: %s", date)
140
		if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5):
141
			# Exclude non-downleg measurements where the latitude
142
			# of the last real tangent point (the last is dark sky)
143
			# is larger than or too close to the first latitude.
144
			# Requires an (empirical) separation of +0.5 degree.
145
			logging.debug("excluding upleg point at: %s, %s", lat, lon)
146
			continue
147
		dtdate = pd.to_datetime(dt.datetime(*date), utc=True)
148
		time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0
149
		logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr)
150
		dts.append(dtdate)
151
		times.append(time_hour)
152
		lats.append(lat)
153
		lons.append(lon)
154
		mlsts.append(mlst)
155
		alsts.append(alst)
156
157
	if len(lats) < 2:
158
		# interpolation will fail with less than 2 points
159
		return fail
160
161
	return (np.asarray(dts),
162
			np.asarray(times),
163
			np.asarray(lats),
164
			np.asarray(lons),
165
			np.asarray(mlsts),
166
			np.asarray(alsts), eotcorr)
0 ignored issues
show
introduced by
The variable eotcorr does not seem to be defined in case the for loop on line 127 is not entered. Are you sure this can never be the case?
Loading history...
167
168
169
class _circ_interp(object):
170
	"""Interpolation on a circle"""
171
	def __init__(self, x, y, **kw):
172
		self.c_intpf = interp1d(x, np.cos(y), **kw)
173
		self.s_intpf = interp1d(x, np.sin(y), **kw)
174
175
	def __call__(self, x):
176
		return np.arctan2(self.s_intpf(x), self.c_intpf(x))
177
178
179 View Code Duplication
def process_orbit(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
180
	orbit,
181
	ref_date="2000-01-01",
182
	dens_path=None,
183
	spec_base=None,
184
	use_msis=True,
185
):
186
	"""Post process retrieved SCIAMACHY orbit
187
188
	Parameters
189
	----------
190
	orbit: int
191
		SCIAMACHY/Envisat orbit number of the results to process.
192
	ref_date: str, optional
193
		Base date to calculate the relative days from,
194
		of the format "%Y-%m-%d". Default: 2000-01-01
195
	dens_path: str, optional
196
		The path to the level 2 data. Uses the current
197
		dir if not set or set to `None` (default).
198
	spec_base: str, optional
199
		The root path to the level 1c spectra. Uses the current
200
		dir if not set or set to `None` (default).
201
202
	Returns
203
	-------
204
	(dts0, time0, lst0, lon0, sdd): tuple
205
		dts0 - days since ref_date at equator crossing (float)
206
		time0 - utc hour into the day at equator crossing (float)
207
		lst0 - apparent local solar time at the equator (float)
208
		lon0 - longitude of the equator crossing (float)
209
		sdd - `scia_density_pp` instance of the post-processed data
210
	"""
211
	def _read_gm(fname):
212
		return dict(np.genfromtxt(fname, usecols=[0, 2], dtype=None))
213
214
	fail = (None,) * 5
215
	logging.debug("processing orbit: %s", orbit)
216
	dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True)
217
218
	dfiles = glob.glob(
219
			"{0}/000NO_orbit_{1:05d}_*_Dichten.txt"
220
			.format(dens_path, orbit))
221
	if len(dfiles) < 1:
222
		return fail
223
	logging.debug("dfiles: %s", dfiles)
224
	logging.debug("splits: %s", [fn.split('/') for fn in dfiles])
225
	ddict = dict([(fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4]))
226
				for fn in dfiles])
227
	logging.debug("ddict: %s", ddict)
228
	year = ddict[sorted(ddict.keys())[0]][-1][:4]
229
	logging.debug("year: %s", year)
230
231
	dts, times, lats, lons, mlsts, alsts, eotcorr = \
232
		read_spectra(year, orbit, spec_base)
233
	if dts is None:
234
		# return early if reading the spectra failed
235
		return fail
236
237
	dts = pd.to_datetime(dts, utc=True) - dtrefdate
238
	dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts])
239
	logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times)
240
241
	sdd = sd.scia_densities_pp(ref_date=ref_date)
242
	sdd.read_from_file(dfiles[0])
243
	logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons)
244
245
	# Re-interpolates the location (longitude) and times from the
246
	# limb scan spectra files along the orbit to determine the values
247
	# at the Equator and to fill in possibly missing data.
248
	#
249
	# y values are unit circle angles in radians (0 < φ < 2π or -π < φ < π)
250
	# longitudes
251
	lons_intpf = _circ_interp(
252
		lats[::-1], np.radians(lons[::-1]),
253
		fill_value="extrapolate",
254
	)
255
	# apparent local solar time (EoT corrected)
256
	lst_intpf = _circ_interp(
257
		lats[::-1], np.pi / 12. * alsts[::-1],
258
		fill_value="extrapolate",
259
	)
260
	# mean local solar time
261
	mst_intpf = _circ_interp(
262
		lats[::-1], np.pi / 12. * mlsts[::-1],
263
		fill_value="extrapolate",
264
	)
265
	# utc time (day)
266
	time_intpf = _circ_interp(
267
		lats[::-1], np.pi / 12. * times[::-1],
268
		fill_value="extrapolate",
269
	)
270
	# datetime
271
	dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate")
272
273
	# equator values
274
	lon0 = np.degrees(lons_intpf(0.)) % 360.
275
	lst0 = (lst_intpf(0.) * 12. / np.pi) % 24.
276
	mst0 = (mst_intpf(0.) * 12. / np.pi) % 24.
277
	time0 = (time_intpf(0.) * 12. / np.pi) % 24.
278
	dts_retr_interp0 = dts_retr_interpf(0.)
279
	logging.debug("utc day at equator: %s", dts_retr_interp0)
280
	logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0)
281
282
	sdd.utchour = (time_intpf(sdd.lats) * 12. / np.pi) % 24.
283
	sdd.utcdays = dts_retr_interpf(sdd.lats)
284
285
	if sdd.lons is None:
286
		# recalculate the longitudes
287
		# estimate the equatorial longitude from the
288
		# limb scan latitudes and longitudes
289
		lon0s_tp = lons - PHI_FAC * np.tan(np.radians(lats))
290
		clon0s_tp = np.cos(np.radians(lon0s_tp))
291
		slon0s_tp = np.sin(np.radians(lon0s_tp))
292
		lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1]))
293
		lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi))
294
		logging.info("lon0: %s", lon0)
295
		logging.info("lon0 tp: %s", lon0_tp)
296
		# interpolate to the retrieval latitudes
297
		tg_retr_lats = np.tan(np.radians(sdd.lats))
298
		calc_lons = (tg_retr_lats * PHI_FAC + lon0) % 360.
299
		calc_lons_tp = (tg_retr_lats * PHI_FAC + lon0_tp) % 360.
300
		sdd.lons = calc_lons_tp
301
		logging.debug("(calculated) retrieval lons: %s, %s",
302
				calc_lons, calc_lons_tp)
303
	else:
304
		# sdd.lons = sdd.lons % 360.
305
		logging.debug("(original) retrieval lons: %s", sdd.lons)
306
307
	sdd.mst = (sdd.utchour + sdd.lons / 15.) % 24.
308
	sdd.lst = sdd.mst + eotcorr / 60.
309
	mean_alt_km = sdd.alts.mean()
310
311
	dt_date_this = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
312
	logging.info("date: %s", dt_date_this)
313
	# caclulate geomagnetic coordinates
314
	sdd.gmlats, sdd.gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=mean_alt_km)
315
	logging.debug("geomag. lats: %s, lons: %s", sdd.gmlats, sdd.gmlons)
316
	sdd.aacgmgmlats, sdd.aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons)
317
	logging.debug("aacgm geomag. lats: %s, lons: %s",
318
			sdd.aacgmgmlats, sdd.aacgmgmlons)
319
320
	# current day for MSIS input
321
	f107_data = _read_gm(F107_FILE)
322
	f107a_data = _read_gm(F107A_FILE)
323
	ap_data = _read_gm(AP_FILE)
324
	msis_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
325
	msis_dtdate1 = msis_dtdate - dt.timedelta(days=1)
326
	msis_date = msis_dtdate.strftime("%Y-%m-%d").encode()
327
	msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode()
328
	msis_f107 = f107_data[msis_date1]
329
	msis_f107a = f107a_data[msis_date]
330
	msis_ap = ap_data[msis_date]
331
	logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s",
332
			msis_date, msis_f107a, msis_f107, msis_ap)
333
334
	# previous day for NOEM input
335
	f107_adj = _read_gm(F107_ADJ_FILE)
336
	kp_data = _read_gm(KP_FILE)
337
	noem_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0) - 1) + dtrefdate
338
	noem_date = noem_dtdate.strftime("%Y-%m-%d").encode()
339
	noem_f107 = f107_adj[noem_date]
340
	noem_kp = kp_data[noem_date]
341
	logging.debug("NOEM date: %s, f10.7: %s, kp: %s",
342
			noem_date, noem_f107, noem_kp)
343
344
	if sdd.noem_no is None:
345
		sdd.noem_no = np.zeros_like(sdd.densities)
346
	if sdd.temperature is None and msise is None:
347
		sdd.temperature = np.full_like(sdd.densities, np.nan)
348
	if sdd.sza is None:
349
		sdd.sza = np.zeros_like(sdd.lats)
350
	if sdd.akdiag is None:
351
		sdd.akdiag = np.zeros_like(sdd.densities)
352
		akm_filename = glob.glob(
353
				"{0}/000NO_orbit_{1:05d}_*_AKM*"
354
				.format(dens_path, orbit))[0]
355
		logging.debug("ak file: %s", akm_filename)
356
		ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat)
357
		logging.debug("ak data: %s", ak)
358
		sdd.akdiag = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1)
359
360
	if msise is not None:
361
		if sdd.temperature is None or use_msis:
362
			_msis_d_t = msise(
363
				msis_dtdate,
364
				sdd.alts[None, :], sdd.lats[:, None], sdd.lons[:, None] % 360.,
365
				msis_f107a, msis_f107, msis_ap,
366
				lst=sdd.lst[:, None],
367
			)
368
			sdd.temperature = _msis_d_t[:, :, -1]
369
		if use_msis:
370
			sdd.dens_tot = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2)
0 ignored issues
show
introduced by
The variable _msis_d_t does not seem to be defined in case sdd.temperature is None or use_msis on line 361 is False. Are you sure this can never be the case?
Loading history...
371
	for i, lat in enumerate(sdd.lats):
372
		if noem_cpp is not None:
373
			sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts,
374
					[lat], [sdd.lons[i]], noem_f107, noem_kp)[:]
375
		else:
376
			sdd.noem_no[i][:] = np.nan
377
	sdd.sza[:] = solar_zenith_angle(
378
			mean_alt_km,
379
			sdd.lats, sdd.lons,
380
			(pd.to_timedelta(sdd.utcdays, unit="days") + dtrefdate).to_pydatetime(),
381
	)
382
	sdd.vmr = sdd.densities / sdd.dens_tot * 1.e9  # ppb
383
	return dts_retr_interp0, time0, lst0, lon0, sdd
384
385
386 View Code Duplication
def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
387
	"""Find SCIAMACHY orbits with retrieved data at a date
388
389
	Parameters
390
	----------
391
	date: str
392
		The date in the format "%Y-%m-%d".
393
	mlt: bool, optional
394
		Look for MLT mode data instead of nominal mode data.
395
		Increases the heuristics to find all MLT orbits.
396
	path: str, optional
397
		The path to the level 2 data. If `None` tries to infer
398
		the data directory from the L2 version using
399
		'./*<L2_version>'. Default: None
400
401
	Returns
402
	-------
403
	orbits: list
404
		List of found orbits with retrieved data files
405
	"""
406
	logging.debug("pre-processing: %s", date)
407
	if path is None:
408
		density_base = os.curdir
409
		path = "{0}/*{1}".format(density_base, L2_version)
410
		logging.debug("path: %s", path)
411
412
	dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format(
413
				path, date.replace("-", "")))
414
	orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles])
415
	if mlt:
416
		orbits.append(orbits[-1] + 1)
417
	return orbits
418
419
420 View Code Duplication
def combine_orbit_data(orbits,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
421
		ref_date="2000-01-01",
422
		L2_version="v6.2", file_version="2.2",
423
		dens_path=None, spec_base=None,
424
		use_xarray=False, save_nc=False):
425
	"""Combine post-processed SCIAMACHY retrieved orbit data
426
427
	Parameters
428
	----------
429
	orbits: list
430
		List of SCIAMACHY/Envisat orbit numbers to process.
431
	ref_date: str, optional
432
		Base date to calculate the relative days from,
433
		of the format "%Y-%m-%d". Default: 2000-01-01
434
	L2_version: str, optional
435
		SCIAMACHY level 2 data version to process
436
	file_version: str, optional
437
		Postprocessing format version of the output data
438
	dens_path: str, optional
439
		The path to the level 2 data. If `None` tries to infer
440
		the data directory from the L2 version looking for anything
441
		in the current directory that ends in <L2_version>: './*<L2_version>'.
442
		Default: None
443
	spec_base: str, optional
444
		The root path to the level 1c spectra. Uses the current
445
		dir if not set or set to `None` (default).
446
	use_xarray: bool, optional
447
		Uses xarray (if available) to combine the orbital data.
448
	save_nc: bool, optional
449
		Save the intermediate orbit data sets to netcdf files
450
		for debugging.
451
452
	Returns
453
	-------
454
	(sdday, sdday_ds): tuple
455
		`sdday` contains the combined data as a `scia_density_day` instance,
456
		`sdday_ds` contains the same data as a `xarray.Dataset`.
457
	"""
458
	if dens_path is None:
459
		# try some heuristics
460
		density_base = os.curdir
461
		dens_path = "{0}/*{1}".format(density_base, L2_version)
462
463
	sdday = sd.scia_density_day(ref_date=ref_date)
464
	sddayl = []
465
	sdday_ds = None
466
	for orbit in sorted(orbits):
467
		dateo, timeo, lsto, lono, sdens = process_orbit(orbit,
468
				ref_date=ref_date, dens_path=dens_path, spec_base=spec_base)
469
		logging.info(
470
			"orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s",
471
			orbit, dateo, timeo, lsto, lono
472
		)
473
		if sdens is not None:
474
			sdens.version = file_version
475
			sdens.data_version = L2_version
476
			sdday.append_data(dateo, orbit, timeo, sdens)
477
			if use_xarray:
478
				sd_xr = sdens.to_xarray(dateo, orbit)
479
				if sd_xr is not None:
480
					logging.debug("orbit %s dataset: %s", orbit, sd_xr)
481
					sddayl.append(sd_xr)
482
			if save_nc:
483
				sdens.write_to_netcdf(sdens.filename[:-3] + "nc")
484
	if use_xarray and sddayl:
485
		sdday_ds = xr.concat(sddayl, dim="time")
486
	return sdday, sdday_ds
487
488
489
VAR_ATTRS = {
490
	"2.1": {
491
		"MSIS_Dens": dict(
492
			units='cm^{-3}',
493
			long_name='total number density (NRLMSIS-00)',
494
		),
495
		"MSIS_Temp": dict(
496
			units='K',
497
			long_name='temperature',
498
			model="NRLMSIS-00",
499
		),
500
	},
501
	"2.2": {
502
	},
503
	"2.3": {
504
		"aacgm_gm_lats": dict(
505
			long_name='geomagnetic_latitude',
506
			model='AACGM2005 at 80 km',
507
			units='degrees_north',
508
		),
509
		"aacgm_gm_lons": dict(
510
			long_name='geomagnetic_longitude',
511
			model='AACGM2005 at 80 km',
512
			units='degrees_east',
513
		),
514
		"orbit": dict(
515
			axis='T', calendar='standard',
516
			long_name='SCIAMACHY/Envisat orbit number',
517
			standard_name="orbit",
518
			units='1',
519
		),
520
	},
521
}
522
VAR_RENAME = {
523
	"2.1": {
524
		# Rename to v2.1 variable names
525
		"MSIS_Dens": "TOT_DENS",
526
		"MSIS_Temp": "temperature",
527
	},
528
	"2.2": {
529
	},
530
	"2.3": {
531
	},
532
}
533
FLOAT_VARS = [
534
	"altitude", "latitude", "longitude",
535
	"app_LST", "mean_LST", "mean_SZA",
536
	"aacgm_gm_lats", "aacgm_gm_lons",
537
	"gm_lats", "gm_lons",
538
]
539
540
541 View Code Duplication
def sddata_xr_set_attrs(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
542
	sdday_xr,
543
	file_version="2.2",
544
	ref_date="2000-01-01",
545
	rename=True,
546
	species="NO",
547
):
548
	"""Customize xarray Dataset variables and attributes
549
550
	Changes the variable names to match those exported from the
551
	`scia_density_day` class.
552
553
	Parameters
554
	----------
555
	sdday_xr: `xarray.Dataset` instance
556
		The combined dataset.
557
	file_version: string "major.minor", optional
558
		The netcdf file datase version, determines some variable
559
		names and attributes.
560
	ref_date: str, optional
561
		Base date to calculate the relative days from,
562
		of the format "%Y-%m-%d". Default: 2000-01-01
563
	rename: bool, optional
564
		Rename the dataset variables to match the
565
		`scia_density_day` exported ones.
566
		Default: True
567
	species: str, optional
568
		The name of the level 2 species, used to prefix the
569
		dataset variables to be named <species>_<variable>.
570
		Default: "NO".
571
	"""
572
	if rename:
573
		sdday_xr = sdday_xr.rename({
574
			# 2d vars
575
			"akm_diagonal": "{0}_AKDIAG".format(species),
576
			"apriori": "{0}_APRIORI".format(species),
577
			"density": "{0}_DENS".format(species),
578
			"density_air": "MSIS_Dens",
579
			"error_meas": "{0}_ERR".format(species),
580
			"error_tot": "{0}_ETOT".format(species),
581
			"temperature": "MSIS_Temp",
582
			"NOEM_density": "{0}_NOEM".format(species),
583
			"VMR": "{0}_VMR".format(species),
584
			# 1d vars and dimensions
585
			"app_lst": "app_LST",
586
			"mean_lst": "mean_LST",
587
			"mean_sza": "mean_SZA",
588
			"utc_hour": "UTC",
589
		})
590
	# relative standard deviation
591
	sdday_xr["{0}_RSTD".format(species)] = 100.0 * np.abs(
592
			sdday_xr["{0}_ERR".format(species)] / sdday_xr["{0}_DENS".format(species)])
593
	# fix coordinate attributes
594
	sdday_xr["time"].attrs = dict(axis='T', standard_name='time',
595
		calendar='standard', long_name='equatorial crossing time',
596
		units="days since {0}".format(
597
			pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")))
598
	sdday_xr["altitude"].attrs = dict(axis='Z', positive='up',
599
		long_name='altitude', standard_name='altitude', units='km')
600
	sdday_xr["latitude"].attrs = dict(axis='Y', long_name='latitude',
601
		standard_name='latitude', units='degrees_north')
602
	# Default variable attributes
603
	sdday_xr["{0}_DENS".format(species)].attrs = {
604
			"units": "cm^{-3}",
605
			"long_name": "{0} number density".format(species)}
606
	sdday_xr["{0}_ERR".format(species)].attrs = {
607
			"units": "cm^{-3}",
608
			"long_name": "{0} density measurement error".format(species)}
609
	sdday_xr["{0}_ETOT".format(species)].attrs = {
610
			"units": "cm^{-3}",
611
			"long_name": "{0} density total error".format(species)}
612
	sdday_xr["{0}_RSTD".format(species)].attrs = dict(
613
			units='%',
614
			long_name='{0} relative standard deviation'.format(species))
615
	sdday_xr["{0}_AKDIAG".format(species)].attrs = dict(
616
			units='1',
617
			long_name='{0} averaging kernel diagonal element'.format(species))
618
	sdday_xr["{0}_APRIORI".format(species)].attrs = dict(
619
			units='cm^{-3}', long_name='{0} apriori density'.format(species))
620
	sdday_xr["{0}_NOEM".format(species)].attrs = dict(
621
			units='cm^{-3}', long_name='NOEM {0} number density'.format(species))
622
	sdday_xr["{0}_VMR".format(species)].attrs = dict(
623
			units='ppb', long_name='{0} volume mixing ratio'.format(species))
624
	sdday_xr["MSIS_Dens"].attrs = dict(units='cm^{-3}',
625
			long_name='MSIS total number density',
626
			model="NRLMSIS-00")
627
	sdday_xr["MSIS_Temp"].attrs = dict(units='K',
628
			long_name='MSIS temperature',
629
			model="NRLMSIS-00")
630
	sdday_xr["longitude"].attrs = dict(long_name='longitude',
631
			standard_name='longitude', units='degrees_east')
632
	sdday_xr["app_LST"].attrs = dict(units='hours',
633
			long_name='apparent local solar time')
634
	sdday_xr["mean_LST"].attrs = dict(units='hours',
635
			long_name='mean local solar time')
636
	sdday_xr["mean_SZA"].attrs = dict(units='degrees',
637
			long_name='solar zenith angle at mean altitude')
638
	sdday_xr["UTC"].attrs = dict(units='hours',
639
			long_name='measurement utc time')
640
	sdday_xr["utc_days"].attrs = dict(
641
			units='days since {0}'.format(
642
				pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")),
643
			long_name='measurement utc day')
644
	sdday_xr["gm_lats"].attrs = dict(long_name='geomagnetic_latitude',
645
			model='IGRF', units='degrees_north')
646
	sdday_xr["gm_lons"].attrs = dict(long_name='geomagnetic_longitude',
647
			model='IGRF', units='degrees_east')
648
	sdday_xr["aacgm_gm_lats"].attrs = dict(long_name='geomagnetic_latitude',
649
			# model='AACGM2005 80 km',  # v2.3
650
			model='AACGM',  # v2.1, v2.2
651
			units='degrees_north')
652
	sdday_xr["aacgm_gm_lons"].attrs = dict(long_name='geomagnetic_longitude',
653
			# model='AACGM2005 80 km',  # v2.3
654
			model='AACGM',  # v2.1, v2.2
655
			units='degrees_east')
656
	sdday_xr["orbit"].attrs = dict(
657
			axis='T', calendar='standard',
658
			# long_name='SCIAMACHY/Envisat orbit number',  # v2.3
659
			long_name='orbit',  # v2.1, v2.2
660
			standard_name="orbit",
661
			# units='1',  # v2.3
662
			units='orbit number',  # v2.1, v2.2
663
	)
664
	# Overwrite version-specific variable attributes
665
	for _v, _a in VAR_ATTRS[file_version].items():
666
		sdday_xr[_v].attrs = _a
667
	if rename:
668
		# version specific renaming
669
		sdday_xr = sdday_xr.rename(VAR_RENAME[file_version])
670
	if int(file_version.split(".")[0]) < 3:
671
		# invert latitudes for backwards-compatitbility
672
		sdday_xr = sdday_xr.sortby("latitude", ascending=False)
673
	else:
674
		sdday_xr = sdday_xr.sortby("latitude", ascending=True)
675
676
	dateo = pd.to_datetime(
677
			xr.conventions.decode_cf_variable("date", sdday_xr.time).data[0],
678
			utc=True,
679
	).strftime("%Y-%m-%d")
680
	logging.debug("date %s dataset: %s", dateo, sdday_xr)
681
	return sdday_xr
682
683
684 View Code Duplication
def main():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
685
	logging.basicConfig(level=logging.WARNING,
686
			format="[%(levelname)-8s] (%(asctime)s) "
687
			"%(filename)s:%(lineno)d %(message)s",
688
			datefmt="%Y-%m-%d %H:%M:%S %z")
689
690
	parser = ap.ArgumentParser()
691
	parser.add_argument("file", default="SCIA_NO.nc",
692
			help="the filename of the output netcdf file")
693
	parser.add_argument("-M", "--month", metavar="YEAR-MM",
694
			help="infer start and end dates for month")
695
	parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE",
696
			help="colon-separated start and end dates")
697
	parser.add_argument("-d", "--dates", help="comma-separated list of dates")
698
	parser.add_argument("-B", "--base_date",
699
			metavar="YEAR-MM-DD", default="2000-01-01",
700
			help="Reference date to base the time values (days) on "
701
			"(default: %(default)s).")
702
	parser.add_argument("-f", "--orbit_file",
703
			help="the file containing the input orbits")
704
	parser.add_argument("-r", "--retrieval_version", default="v6.2",
705
			help="SCIAMACHY level 2 data version to process")
706
	parser.add_argument("-R", "--file_version", default="2.2",
707
			help="Postprocessing format version of the output file")
708
	parser.add_argument("-A", "--author", default="unknown",
709
			help="Author of the post-processed data set "
710
			"(default: %(default)s)")
711
	parser.add_argument("-p", "--path", default=None,
712
			help="path containing the L2 data")
713
	parser.add_argument("-s", "--spectra", default=None, metavar="PATH",
714
			help="path containing the L1c spectra")
715
	parser.add_argument("-m", "--mlt", action="store_true", default=False,
716
			help="indicate nominal (False, default) or MLT data (True)")
717
	parser.add_argument("-X", "--xarray", action="store_true", default=False,
718
			help="use xarray to prepare the dataset"
719
			" (experimental, default %(default)s)")
720
	loglevels = parser.add_mutually_exclusive_group()
721
	loglevels.add_argument("-l", "--loglevel", default="WARNING",
722
			choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
723
			help="change the loglevel (default: 'WARNING')")
724
	loglevels.add_argument("-q", "--quiet", action="store_true", default=False,
725
			help="less output, same as --loglevel=ERROR (default: False)")
726
	loglevels.add_argument("-v", "--verbose", action="store_true", default=False,
727
			help="verbose output, same as --loglevel=INFO (default: False)")
728
	args = parser.parse_args()
729
	if args.quiet:
730
		logging.getLogger().setLevel(logging.ERROR)
731
	elif args.verbose:
732
		logging.getLogger().setLevel(logging.INFO)
733
	else:
734
		logging.getLogger().setLevel(args.loglevel)
735
736
	logging.info("processing L2 version: %s", args.retrieval_version)
737
	logging.info("writing data file version: %s", args.file_version)
738
739
	pddrange = []
740
	if args.month is not None:
741
		d0 = pd.to_datetime(args.month + "-01", utc=True)
742
		pddrange += pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd())
743
	if args.date_range is not None:
744
		pddrange += pd.date_range(*args.date_range.split(':'))
745
	if args.dates is not None:
746
		pddrange += pd.to_datetime(args.dates.split(','), utc=True)
747
	logging.debug("pddrange: %s", pddrange)
748
749
	olist = []
750
	for date in pddrange:
751
		try:
752
			olist += get_orbits_from_date(date.strftime("%Y-%m-%d"),
753
				mlt=args.mlt, path=args.path, L2_version=args.retrieval_version)
754
		except:  # handle NaT
755
			pass
756
	if args.orbit_file is not None:
757
		olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist()
758
	logging.debug("olist: %s", olist)
759
760
	if not olist:
761
		logging.warn("No orbits to process.")
762
		return
763
764
	sdlist, sdxr_ds = combine_orbit_data(olist,
765
			ref_date=args.base_date,
766
			L2_version=args.retrieval_version, file_version=args.file_version,
767
			dens_path=args.path, spec_base=args.spectra, use_xarray=args.xarray,
768
			save_nc=False)
769
	sdlist.author = args.author
770
771
	if args.xarray and sdxr_ds is not None:
772
		sdxr_ds.attrs["author"] = args.author
773
		sd_xr = sddata_xr_set_attrs(
774
			sdxr_ds, ref_date=args.base_date,
775
			rename=True, file_version=args.file_version,
776
		)
777
		sd_xr2 = sdlist.to_xarray()
778
		# Overwrite version-specific variable attributes
779
		for _v, _a in VAR_ATTRS[args.file_version].items():
780
			sd_xr2[_v].attrs = _a
781
		# version specific renaming
782
		sd_xr2 = sd_xr2.rename(VAR_RENAME[args.file_version])
783
		logging.debug(sd_xr)
784
		logging.debug(sd_xr2)
785
		logging.debug("equal datasets: %s", sd_xr.equals(sd_xr2))
786
		xr.testing.assert_allclose(sd_xr, sd_xr2)
787
		if sd_xr2 is not None:
788
			logging.debug("xarray dataset: %s", sd_xr2)
789
			sd_xr2.to_netcdf(args.file, unlimited_dims=["time"])
790
	else:
791
		if sdlist.no_dens is not None:
792
			sdlist.write_to_netcdf(args.file)
793
		else:
794
			logging.warn("Processed data is empty.")
795
796
797
if __name__ == "__main__":
798
	main()
799