Passed
Push — master ( 9d3b1e...fe933f )
by Stefan
03:01
created

scia_post_process_l2   C

Complexity

Total Complexity 54

Size/Duplication

Total Lines 630
Duplicated Lines 83.97 %

Importance

Changes 0
Metric Value
eloc 399
dl 529
loc 630
rs 6.4799
c 0
b 0
f 0
wmc 54

6 Functions

Rating   Name   Duplication   Size   Complexity  
F process_orbit() 194 194 15
A get_orbits_from_date() 32 32 3
A sddata_xr_set_attrs() 47 47 2
C combine_orbit_data() 65 67 9
C read_spectra() 101 101 9
F main() 90 94 14

2 Methods

Rating   Name   Duplication   Size   Complexity  
A _circ_interp.__init__() 0 3 1
A _circ_interp.__call__() 0 2 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like scia_post_process_l2 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# vim: set fileencoding=utf-8
3
"""SCIAMACHY level 2 data post processing
4
5
Main script for SCIAMACHY orbital retrieval post processing
6
and data combining (to netcdf).
7
8
Copyright (c) 2018 Stefan Bender
9
10
This file is part of sciapy.
11
sciapy is free software: you can redistribute it or modify it
12
under the terms of the GNU General Public License as published by
13
the Free Software Foundation, version 2.
14
See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
15
"""
16
17
from __future__ import absolute_import, division, print_function
18
19
import glob
20
import os
21
import argparse as ap
22
import datetime as dt
23
import logging
24
25
import numpy as np
26
import pandas as pd
27
import xarray as xr
28
from scipy.interpolate import interp1d
29
try:
30
	import pysolar.solar as sol
31
	sun_alt_func = sol.get_altitude
32
except ImportError:  # pysolar 0.6 (for python 2)
33
	import Pysolar.solar as sol
34
	sun_alt_func = sol.GetAltitude
35
36
import sciapy.level1c as sl
37
from sciapy.level2 import density_pp as sd
38
from sciapy.level2 import scia_akm as sa
39
from sciapy.level2.igrf import gmag_igrf
40
from sciapy.level2.aacgm2005 import gmag_aacgm2005
41
try:
42
	from nrlmsise00 import msise_flat as msise
43
except ImportError:
44
	msise = None
45
try:
46
	from sciapy.level2.noem import noem_cpp
47
except ImportError:
48
	noem_cpp = None
49
50
this_dir = os.path.realpath(os.path.dirname(__file__))
51
f107_data = dict(np.genfromtxt(
52
	os.path.join(this_dir, "../data/indices/f107_noontime_flux_obs.txt"),
53
	usecols=[0, 2], dtype=None))
54
f107a_data = dict(np.genfromtxt(
55
	os.path.join(this_dir, "../data/indices/f107a_noontime_flux_obs.txt"),
56
	usecols=[0, 2], dtype=None))
57
ap_data = dict(np.genfromtxt(
58
	os.path.join(this_dir, "../data/indices/spidr_ap_2000-2012.dat"),
59
	usecols=[0, 2], dtype=None))
60
f107_adj = dict(np.genfromtxt(
61
	os.path.join(this_dir, "../data/indices/spidr_f107_2000-2012.dat"),
62
	usecols=[0, 2], dtype=None))
63
kp_data = dict(np.genfromtxt(
64
	os.path.join(this_dir, "../data/indices/spidr_kp_2000-2012.dat"),
65
	usecols=[0, 2], dtype=None))
66
67
phi_fac = 11.91
68
lst_fac = -0.62
69
70
71 View Code Duplication
def read_spectra(year, orbit, spec_base=None, skip_upleg=True):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
72
	"""Read and examine SCIAMACHY orbit spectra
73
74
	Reads the limb spactra and extracts the dates, times, latitudes,
75
	longitudes to be used to re-assess the retrieved geolocations.
76
77
	Parameters
78
	----------
79
	year: int
80
		The measurement year to select the corresponding subdir
81
		below `spec_base` (see below).
82
	orbit: int
83
		SCIAMACHY/Envisat orbit number of the spectra.
84
	spec_base: str, optional
85
		The root path to the level 1c spectra. Uses the current
86
		dir if not set or set to `None` (default).
87
	skip_upleg: bool, optional
88
		Skip upleg limb scans, i.e. night time scans. For NO retrievals,
89
		those are not used and should be not used here.
90
		Default: True
91
92
	Returns
93
	-------
94
	(dts, times, lats, lons, mlsts, alsts, eotcorr)
95
	"""
96
	fail = (None,) * 7
97
	if spec_base is None:
98
		spec_base = os.curdir
99
	spec_path = "{0}/{1}".format(spec_base.rstrip('/'), year)
100
	spec_path2 = "{0}/{1}".format(spec_base.rstrip('/'), int(year) + 1)
101
	logging.debug("spec_path: %s", spec_path)
102
	logging.debug("spec_path2: %s", spec_path)
103
	if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)):
104
		return fail
105
106
	# the star stands for the (optional) date subdir
107
	# to find all spectra for the orbit
108
	spfiles = glob.glob(
109
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
110
			.format(spec_path, orbit))
111
	# sometimes for whatever reason the orbit ends up in the wrong year subdir
112
	# looks in the subdir for the following year as well.
113
	spfiles += glob.glob(
114
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
115
			.format(spec_path2, orbit))
116
	spdict = dict([(fn, os.path.basename(fn).split('_')[2:4])
117
				for fn in spfiles])
118
	logging.debug("spdict: %s", spdict)
119
	if len(spfiles) < 2:
120
		return fail
121
122
	sorted_spdkeys = sorted(spdict.keys())
123
124
	slscans = [sl.scia_limb_scan() for _ in spdict]
125
	[s.read_from_file(f) for s, f in zip(slscans, sorted_spdkeys)]
126
127
	lsts = [(s.cent_lat_lon[:2],
128
			s.local_solar_time(False),
129
			s.limb_data.tp_lat)
130
			for s in slscans]
131
	lstdict = dict(zip(sorted_spdkeys, lsts))
132
	logging.debug("lstdict: %s", lstdict)
133
134
	dts = []
135
	times = []
136
	lats = []
137
	lons = []
138
	mlsts = []
139
	alsts = []
140
141
	for key in sorted_spdkeys:
142
		(lat, lon), (mlst, alst, eotcorr), tp_lats = lstdict[key]
143
		logging.debug("lat: %s, lon: %s", lat, lon)
144
		if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5):
145
			# Exclude non-downleg measurements where the latitude
146
			# of the last real tangent point (the last is dark sky)
147
			# is larger than or too close to the first latitude.
148
			# Requires an (empirical) separation of +0.5 degree.
149
			logging.debug("excluding upleg point at: %s, %s", lat, lon)
150
			continue
151
		dtdate = pd.to_datetime(''.join(spdict[key]),
152
				format="%Y%m%d%H%M%S", utc=True)
153
		time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0
154
		logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr)
155
		dts.append(dtdate)
156
		times.append(time_hour)
157
		lats.append(lat)
158
		lons.append(lon)
159
		mlsts.append(mlst)
160
		alsts.append(alst)
161
162
	if len(lats) < 2:
163
		# interpolation will fail with less than 2 points
164
		return fail
165
166
	return (np.asarray(dts),
167
			np.asarray(times),
168
			np.asarray(lats),
169
			np.asarray(lons),
170
			np.asarray(mlsts),
171
			np.asarray(alsts), eotcorr)
0 ignored issues
show
introduced by
The variable eotcorr does not seem to be defined in case the for loop on line 141 is not entered. Are you sure this can never be the case?
Loading history...
172
173
174
class _circ_interp(object):
175
	"""Interpolation on a circle"""
176
	def __init__(self, x, y, **kw):
177
		self.c_intpf = interp1d(x, np.cos(y), **kw)
178
		self.s_intpf = interp1d(x, np.sin(y), **kw)
179
180
	def __call__(self, x):
181
		return np.arctan2(self.s_intpf(x), self.c_intpf(x))
182
183
184 View Code Duplication
def process_orbit(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
185
	orbit,
186
	ref_date="1950-01-01",
187
	dens_path=None,
188
	spec_base=None,
189
	use_msis=True,
190
):
191
	"""Post process retrieved SCIAMACHY orbit
192
193
	Parameters
194
	----------
195
	orbit: int
196
		SCIAMACHY/Envisat orbit number of the results to process.
197
	ref_date: str, optional
198
		Base date to calculate the relative days from,
199
		of the format "%Y-%m-%d". Default: 1950-01-01
200
	dens_path: str, optional
201
		The path to the level 2 data. Uses the current
202
		dir if not set or set to `None` (default).
203
	spec_base: str, optional
204
		The root path to the level 1c spectra. Uses the current
205
		dir if not set or set to `None` (default).
206
207
	Returns
208
	-------
209
	(dts0, time0, lst0, lon0, sdd): tuple
210
		dts0 - days since ref_date at equator crossing (float)
211
		time0 - utc hour into the day at equator crossing (float)
212
		lst0 - apparent local solar time at the equator (float)
213
		lon0 - longitude of the equator crossing (float)
214
		sdd - `scia_density_pp` instance of the post-processed data
215
	"""
216
	fail = (None,) * 5
217
	logging.debug("processing orbit: %s", orbit)
218
	dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True)
219
220
	dfiles = glob.glob(
221
			"{0}/000NO_orbit_{1:05d}_*_Dichten.txt"
222
			.format(dens_path, orbit))
223
	if len(dfiles) < 1:
224
		return fail
225
	logging.debug("dfiles: %s", dfiles)
226
	logging.debug("splits: %s", [fn.split('/') for fn in dfiles])
227
	ddict = dict([(fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4]))
228
				for fn in dfiles])
229
	logging.debug("ddict: %s", ddict)
230
	year = ddict[sorted(ddict.keys())[0]][-1][:4]
231
	logging.debug("year: %s", year)
232
233
	dts, times, lats, lons, mlsts, alsts, eotcorr = \
234
		read_spectra(year, orbit, spec_base)
235
	if dts is None:
236
		# return early if reading the spectra failed
237
		return fail
238
239
	dts = pd.to_datetime(dts) - dtrefdate
240
	dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts])
241
	logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times)
242
243
	sdd = sd.scia_densities_pp(ref_date=ref_date)
244
	sdd.read_from_file(dfiles[0])
245
	logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons)
246
247
	# Re-interpolates the location (longitude) and times from the
248
	# limb scan spectra files along the orbit to determine the values
249
	# at the Equator and to fill in possibly missing data.
250
	#
251
	# y values are unit circle angles in radians (0 < φ < 2π or -π < φ < π)
252
	# longitudes
253
	lons_intpf = _circ_interp(
254
		lats[::-1], np.radians(lons[::-1]),
255
		fill_value="extrapolate",
256
	)
257
	# apparent local solar time (EoT corrected)
258
	lst_intpf = _circ_interp(
259
		lats[::-1], np.pi / 12. * alsts[::-1],
260
		fill_value="extrapolate",
261
	)
262
	# mean local solar time
263
	mst_intpf = _circ_interp(
264
		lats[::-1], np.pi / 12. * mlsts[::-1],
265
		fill_value="extrapolate",
266
	)
267
	# utc time (day)
268
	time_intpf = _circ_interp(
269
		lats[::-1], np.pi / 12. * times[::-1],
270
		fill_value="extrapolate",
271
	)
272
	# datetime
273
	dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate")
274
275
	# equator values
276
	lon0 = np.degrees(lons_intpf(0.)) % 360.
277
	lst0 = (lst_intpf(0.) * 12. / np.pi) % 24.
278
	mst0 = (mst_intpf(0.) * 12. / np.pi) % 24.
279
	time0 = (time_intpf(0.) * 12. / np.pi) % 24.
280
	dts_retr_interp0 = dts_retr_interpf(0.)
281
	logging.debug("utc day at equator: %s", dts_retr_interp0)
282
	logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0)
283
284
	sdd.utchour = (time_intpf(sdd.lats) * 12. / np.pi) % 24.
285
	sdd.utcdays = dts_retr_interpf(sdd.lats)
286
287
	if sdd.lons is None:
288
		# recalculate the longitudes
289
		# estimate the equatorial longitude from the
290
		# limb scan latitudes and longitudes
291
		lon0s_tp = lons - phi_fac * np.tan(np.radians(lats))
292
		clon0s_tp = np.cos(np.radians(lon0s_tp))
293
		slon0s_tp = np.sin(np.radians(lon0s_tp))
294
		lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1]))
295
		lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi))
296
		logging.info("lon0: %s", lon0)
297
		logging.info("lon0 tp: %s", lon0_tp)
298
		# interpolate to the retrieval latitudes
299
		tg_retr_lats = np.tan(np.radians(sdd.lats))
300
		calc_lons = (tg_retr_lats * phi_fac + lon0) % 360.
301
		calc_lons_tp = (tg_retr_lats * phi_fac + lon0_tp) % 360.
302
		sdd.lons = calc_lons_tp
303
		logging.debug("(calculated) retrieval lons: %s, %s",
304
				calc_lons, calc_lons_tp)
305
	else:
306
		# sdd.lons = sdd.lons % 360.
307
		logging.debug("(original) retrieval lons: %s", sdd.lons)
308
309
	sdd.mst = (sdd.utchour + sdd.lons / 15.) % 24.
310
	sdd.lst = sdd.mst + eotcorr / 60.
311
312
	dt_date_this = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
313
	logging.info("date: %s", dt_date_this)
314
	# caclulate geomagnetic coordinates
315
	sdd.gmlats, sdd.gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=100.)
316
	logging.debug("geomag. lats: %s, lons: %s", sdd.gmlats, sdd.gmlons)
317
	sdd.aacgmgmlats, sdd.aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons)
318
	logging.debug("aacgm geomag. lats: %s, lons: %s",
319
			sdd.aacgmgmlats, sdd.aacgmgmlons)
320
321
	# current day for MSIS input
322
	msis_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
323
	msis_dtdate1 = msis_dtdate - dt.timedelta(days=1)
324
	msis_date = msis_dtdate.strftime("%Y-%m-%d").encode()
325
	msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode()
326
	msis_f107 = f107_data[msis_date1]
327
	msis_f107a = f107a_data[msis_date]
328
	msis_ap = ap_data[msis_date]
329
	logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s",
330
			msis_date, msis_f107a, msis_f107, msis_ap)
331
332
	# previous day for NOEM input
333
	noem_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0) - 1) + dtrefdate
334
	noem_date = noem_dtdate.strftime("%Y-%m-%d").encode()
335
	noem_f107 = f107_adj[noem_date]
336
	noem_kp = kp_data[noem_date]
337
	logging.debug("NOEM date: %s, f10.7: %s, kp: %s",
338
			noem_date, noem_f107, noem_kp)
339
340
	if sdd.noem_no is None:
341
		sdd.noem_no = np.zeros_like(sdd.densities)
342
	if sdd.temperature is None and msise is None:
343
		sdd.temperature = np.full_like(sdd.densities, np.nan)
344
	if sdd.sza is None:
345
		sdd.sza = np.zeros_like(sdd.lats)
346
	if sdd.akdiag is None:
347
		sdd.akdiag = np.zeros_like(sdd.densities)
348
		akm_filename = glob.glob(
349
				"{0}/000NO_orbit_{1:05d}_*_AKM*"
350
				.format(dens_path, orbit))[0]
351
		logging.debug("ak file: %s", akm_filename)
352
		ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat)
353
		logging.debug("ak data: %s", ak)
354
		sdd.akdiag = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1)
355
356
	if msise is not None:
357
		if sdd.temperature is None or use_msis:
358
			_msis_d_t = msise(
359
				msis_dtdate,
360
				sdd.alts[None, :], sdd.lats[:, None], sdd.lons[:, None] % 360.,
361
				msis_f107a, msis_f107, msis_ap,
362
				lst=sdd.lst[:, None],
363
			)
364
			sdd.temperature = _msis_d_t[:, :, -1]
365
		if use_msis:
366
			sdd.dens_tot = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2)
0 ignored issues
show
introduced by
The variable _msis_d_t does not seem to be defined in case sdd.temperature is None or use_msis on line 357 is False. Are you sure this can never be the case?
Loading history...
367
	for i, lat in enumerate(sdd.lats):
368
		if noem_cpp is not None:
369
			sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts,
370
					[lat], [sdd.lons[i]], noem_f107, noem_kp)[:]
371
		else:
372
			sdd.noem_no[i][:] = np.nan
373
		sdd.sza[i] = 90. - sun_alt_func(lat, sdd.lons[i],
374
				dt.timedelta(np.asscalar(sdd.utcdays[i])) + dtrefdate,
375
				elevation=sdd.alts.mean() * 1000.)
376
	sdd.vmr = sdd.densities / sdd.dens_tot * 1.e9  # ppb
377
	return dts_retr_interp0, time0, lst0, lon0, sdd
378
379
380 View Code Duplication
def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
381
	"""Find SCIAMACHY orbits with retrieved data at a date
382
383
	Parameters
384
	----------
385
	date: str
386
		The date in the format "%Y-%m-%d".
387
	mlt: bool, optional
388
		Look for MLT mode data instead of nominal mode data.
389
		Increases the heuristics to find all MLT orbits.
390
	path: str, optional
391
		The path to the level 2 data. If `None` tries to infer
392
		the data directory from the L2 version using
393
		'./*<L2_version>'. Default: None
394
395
	Returns
396
	-------
397
	orbits: list
398
		List of found orbits with retrieved data files
399
	"""
400
	logging.debug("pre-processing: %s", date)
401
	if path is None:
402
		density_base = os.curdir
403
		path = "{0}/*{1}".format(density_base, L2_version)
404
		logging.debug("path: %s", path)
405
406
	dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format(
407
				path, date.replace("-", "")))
408
	orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles])
409
	if mlt:
410
		orbits.append(orbits[-1] + 1)
411
	return orbits
412
413
414 View Code Duplication
def combine_orbit_data(orbits,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
415
		ref_date="1950-01-01",
416
		L2_version="v6.2", file_version="2.3",
417
		dens_path=None, spec_base=None,
418
		use_xarray=False, save_nc=False):
419
	"""Combine post-processed SCIAMACHY retrieved orbit data
420
421
	Parameters
422
	----------
423
	orbits: list
424
		List of SCIAMACHY/Envisat orbit numbers to process.
425
	ref_date: str, optional
426
		Base date to calculate the relative days from,
427
		of the format "%Y-%m-%d". Default: 1950-01-01
428
	L2_version: str, optional
429
		SCIAMACHY level 2 data version to process
430
	file_version: str, optional
431
		Postprocessing format version of the output data
432
	dens_path: str, optional
433
		The path to the level 2 data. If `None` tries to infer
434
		the data directory from the L2 version looking for anything
435
		in the current directory that ends in <L2_version>: './*<L2_version>'.
436
		Default: None
437
	spec_base: str, optional
438
		The root path to the level 1c spectra. Uses the current
439
		dir if not set or set to `None` (default).
440
	use_xarray: bool, optional
441
		Uses xarray (if available) to combine the orbital data.
442
	save_nc: bool, optional
443
		Save the intermediate orbit data sets to netcdf files
444
		for debugging.
445
446
	Returns
447
	-------
448
	(sdday, sdday_ds): tuple
449
		`sdday` contains the combined data as a `scia_density_day` instance,
450
		`sdday_ds` contains the same data as a `xarray.Dataset`.
451
	"""
452
	if dens_path is None:
453
		# try some heuristics
454
		density_base = os.curdir
455
		dens_path = "{0}/*{1}".format(density_base, L2_version)
456
457
	sdday = sd.scia_density_day(ref_date=ref_date)
458
	sddayl = []
459
	sdday_ds = None
460
	for orbit in sorted(orbits):
461
		dateo, timeo, lsto, lono, sdens = process_orbit(orbit,
462
				ref_date=ref_date, dens_path=dens_path, spec_base=spec_base)
463
		logging.info(
464
			"orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s",
465
			orbit, dateo, timeo, lsto, lono
466
		)
467
		if sdens is not None:
468
			sdens.version = file_version
469
			sdens.data_version = L2_version
470
			sdday.append_data(dateo, orbit, timeo, sdens)
471
			if use_xarray:
472
				sd_xr = sdens.to_xarray(dateo, orbit)
473
				if sd_xr is not None:
474
					logging.debug("orbit %s dataset: %s", orbit, sd_xr)
475
					sddayl.append(sd_xr)
476
			if save_nc:
477
				sdens.write_to_netcdf(sdens.filename[:-3] + "nc")
478
	if use_xarray and sddayl:
479
		sdday_ds = xr.concat(sddayl, dim="time")
480
	return sdday, sdday_ds
481
482
483 View Code Duplication
def sddata_xr_set_attrs(sdday_xr, ref_date="1950-01-01", rename=True):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
484
	"""Customize xarray Dataset variables and attributes
485
486
	Changes the variable names to match those exported from the
487
	`scia_density_day` class.
488
489
	Parameters
490
	----------
491
	sdday_xr: xarray.Dataset instance
492
	ref_date: str, optional
493
		Base date to calculate the relative days from,
494
		of the format "%Y-%m-%d". Default: 1950-01-01
495
	rename: bool, optional
496
		Rename the dataset variables to match the
497
		`scia_density_day` exported ones.
498
	"""
499
	if rename:
500
		sdday_xr = sdday_xr.rename({
501
			"density": "NO_DENS", "density_air": "TOT_DENS",
502
			"apriori": "NO_APRIORI", "error_meas": "NO_ERR",
503
			"error_tot": "NO_ETOT",
504
			"NOEM_density": "NO_NOEM", "akm_diagonal": "NO_AKDIAG",
505
			"VMR": "NO_VMR",
506
			"utc_hour": "UTC", "mean_sza": "mean_SZA",
507
			"app_lst": "app_LST", "mean_lst": "mean_LST",
508
		})
509
	sdday_xr["NO_RSTD"] = 100 * np.abs(sdday_xr.NO_ERR / sdday_xr.NO_DENS)
510
	sdday_xr["NO_RSTD"].attrs = dict(units='%',
511
		long_name='NO relative standard deviation')
512
	# fix coordinate attributes
513
	sdday_xr["time"].attrs = dict(axis='T', standard_name='time',
514
		calendar='standard', long_name='equatorial crossing time',
515
		units="days since {0}".format(
516
			pd.to_datetime(ref_date).isoformat(sep=" ")))
517
	sdday_xr["orbit"].attrs = dict(axis='T', calendar='standard',
518
		long_name='SCIAMACHY/Envisat orbit number', units='1')
519
	sdday_xr["altitude"].attrs = dict(axis='Z', positive='up',
520
		long_name='altitude', standard_name='altitude', units='km')
521
	sdday_xr["latitude"].attrs = dict(axis='Y', long_name='latitude',
522
		standard_name='latitude', units='degrees_north')
523
	sdday_xr["longitude"].attrs = dict(long_name='longitude',
524
		standard_name='longitude', units='degrees_east')
525
	dateo = (pd.to_datetime(
526
			xr.conventions.decode_cf_variable("date", sdday_xr.time).data[0])
527
				.strftime("%Y-%m-%d"))
528
	logging.debug("date %s dataset: %s", dateo, sdday_xr)
529
	return sdday_xr
530
531
532 View Code Duplication
def main():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
533
	logging.basicConfig(level=logging.WARNING,
534
			format="[%(levelname)-8s] (%(asctime)s) "
535
			"%(filename)s:%(lineno)d %(message)s",
536
			datefmt="%Y-%m-%d %H:%M:%S %z")
537
538
	parser = ap.ArgumentParser()
539
	parser.add_argument("file", default="SCIA_NO.nc",
540
			help="the filename of the output netcdf file")
541
	parser.add_argument("-M", "--month", metavar="YEAR-MM",
542
			help="infer start and end dates for month")
543
	parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE",
544
			help="colon-separated start and end dates")
545
	parser.add_argument("-d", "--dates", help="comma-separated list of dates")
546
	parser.add_argument("-f", "--orbit_file",
547
			help="the file containing the input orbits")
548
	parser.add_argument("-p", "--path", default=None,
549
			help="path containing the L2 data")
550
	parser.add_argument("-r", "--retrieval_version", default="v6.2",
551
			help="SCIAMACHY level 2 data version to process")
552
	parser.add_argument("-R", "--file_version", default="2.3",
553
			help="Postprocessing format version of the output file")
554
	parser.add_argument("-s", "--spectra", default=None,
555
			help="path containing the L1c spectra")
556
	parser.add_argument("-m", "--mlt", action="store_true", default=False,
557
			help="indicate nominal (False, default) or MLT data (True)")
558
	parser.add_argument("-X", "--xarray", action="store_true", default=False,
559
			help="use xarray to prepare the dataset"
560
			" (experimental, default %(default)s)")
561
	loglevels = parser.add_mutually_exclusive_group()
562
	loglevels.add_argument("-l", "--loglevel", default="WARNING",
563
			choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
564
			help="change the loglevel (default: 'WARNING')")
565
	loglevels.add_argument("-q", "--quiet", action="store_true", default=False,
566
			help="less output, same as --loglevel=ERROR (default: False)")
567
	loglevels.add_argument("-v", "--verbose", action="store_true", default=False,
568
			help="verbose output, same as --loglevel=INFO (default: False)")
569
	args = parser.parse_args()
570
	if args.quiet:
571
		logging.getLogger().setLevel(logging.ERROR)
572
	elif args.verbose:
573
		logging.getLogger().setLevel(logging.INFO)
574
	else:
575
		logging.getLogger().setLevel(args.loglevel)
576
577
	logging.info("processing L2 version: %s", args.retrieval_version)
578
	logging.info("writing data file version: %s", args.file_version)
579
580
	pddrange = []
581
	if args.month is not None:
582
		d0 = pd.to_datetime(args.month + "-01")
583
		pddrange += pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd())
584
	if args.date_range is not None:
585
		pddrange += pd.date_range(*args.date_range.split(':'))
586
	if args.dates is not None:
587
		pddrange += pd.to_datetime(args.dates.split(','))
588
	logging.debug("pddrange: %s", pddrange)
589
590
	olist = []
591
	for date in pddrange:
592
		try:
593
			olist += get_orbits_from_date(date.strftime("%Y-%m-%d"),
594
				mlt=args.mlt, path=args.path, L2_version=args.retrieval_version)
595
		except:  # handle NaT
596
			pass
597
	if args.orbit_file is not None:
598
		olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist()
599
	logging.debug("olist: %s", olist)
600
601
	if not olist:
602
		logging.warn("No orbits to process.")
603
		return
604
605
	sdlist, sdxr_ds = combine_orbit_data(olist,
606
			ref_date="2000-01-01",
607
			L2_version=args.retrieval_version, file_version=args.file_version,
608
			dens_path=args.path, spec_base=args.spectra, use_xarray=args.xarray,
609
			save_nc=False)
610
611
	if args.xarray and sdxr_ds is not None:
612
		sd_xr = sddata_xr_set_attrs(sdxr_ds, ref_date="2000-01-01")
613
		sd_xr2 = sdlist.to_xarray()
614
		logging.debug(sd_xr)
615
		logging.debug(sd_xr2)
616
		logging.debug("equal datasets: %s", sd_xr.equals(sd_xr2))
617
		xr.testing.assert_allclose(sd_xr, sd_xr2)
618
		if sd_xr2 is not None:
619
			logging.debug("xarray dataset: %s", sd_xr2)
620
			sd_xr2.to_netcdf(args.file, unlimited_dims=["time"])
621
	else:
622
		if sdlist.no_dens is not None:
623
			sdlist.write_to_netcdf(args.file)
624
		else:
625
			logging.warn("Processed data is empty.")
626
627
628
if __name__ == "__main__":
629
	main()
630