Passed
Push — master ( 95effc...8c9298 )
by Stefan
06:22
created

scia_post_process_l2   B

Complexity

Total Complexity 47

Size/Duplication

Total Lines 586
Duplicated Lines 86.86 %

Importance

Changes 0
Metric Value
eloc 377
dl 509
loc 586
rs 8.64
c 0
b 0
f 0
wmc 47

6 Functions

Rating   Name   Duplication   Size   Complexity  
C read_spectra() 101 101 9
D process_orbit() 174 174 10
A get_orbits_from_date() 32 32 3
A sddata_xr_set_attrs() 47 47 2
C combine_orbit_data() 65 65 9
F main() 90 90 14

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like scia_post_process_l2 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# vim: set fileencoding=utf-8
3
"""SCIAMACHY level 2 data post processing
4
5
Main script for SCIAMACHY orbital retrieval post processing
6
and data combining (to netcdf).
7
8
Copyright (c) 2018 Stefan Bender
9
10
This file is part of sciapy.
11
sciapy is free software: you can redistribute it or modify it
12
under the terms of the GNU General Public License as published by
13
the Free Software Foundation, version 2.
14
See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
15
"""
16
17
from __future__ import absolute_import, division, print_function
18
19
import glob
20
import os
21
import argparse as ap
22
import datetime as dt
23
import logging
24
25
import numpy as np
26
import pandas as pd
27
import xarray as xr
28
from scipy.interpolate import interp1d
29
try:
30
	import pysolar.solar as sol
31
	sun_alt_func = sol.get_altitude
32
except ImportError:  # pysolar 0.6 (for python 2)
33
	import Pysolar.solar as sol
34
	sun_alt_func = sol.GetAltitude
35
36
import sciapy.level1c as sl
37
from sciapy.level2 import density_pp as sd
38
from sciapy.level2 import scia_akm as sa
39
from sciapy.level2.igrf import gmag_igrf
40
from sciapy.level2.aacgm2005 import gmag_aacgm2005
41
from nrlmsise00 import msise_flat as msise
42
try:
43
	from sciapy.level2.noem import noem_cpp
44
except ImportError:
45
	noem_cpp = None
46
47
this_dir = os.path.realpath(os.path.dirname(__file__))
48
f107_data = dict(np.genfromtxt(
49
	os.path.join(this_dir, "../data/indices/f107_noontime_flux_obs.txt"),
50
	usecols=[0, 2], dtype=None))
51
f107a_data = dict(np.genfromtxt(
52
	os.path.join(this_dir, "../data/indices/f107a_noontime_flux_obs.txt"),
53
	usecols=[0, 2], dtype=None))
54
ap_data = dict(np.genfromtxt(
55
	os.path.join(this_dir, "../data/indices/spidr_ap_2000-2012.dat"),
56
	usecols=[0, 2], dtype=None))
57
f107_adj = dict(np.genfromtxt(
58
	os.path.join(this_dir, "../data/indices/spidr_f107_2000-2012.dat"),
59
	usecols=[0, 2], dtype=None))
60
kp_data = dict(np.genfromtxt(
61
	os.path.join(this_dir, "../data/indices/spidr_kp_2000-2012.dat"),
62
	usecols=[0, 2], dtype=None))
63
64
phi_fac = 11.91
65
lst_fac = -0.62
66
67
68 View Code Duplication
def read_spectra(year, orbit, spec_base=None, skip_upleg=True):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
69
	"""Read and examine SCIAMACHY orbit spectra
70
71
	Reads the limb spactra and extracts the dates, times, latitudes,
72
	longitudes to be used to re-assess the retrieved geolocations.
73
74
	Parameters
75
	----------
76
	year: int
77
		The measurement year to select the corresponding subdir
78
		below `spec_base` (see below).
79
	orbit: int
80
		SCIAMACHY/Envisat orbit number of the spectra.
81
	spec_base: str, optional
82
		The root path to the level 1c spectra. Uses the current
83
		dir if not set or set to `None` (default).
84
	skip_upleg: bool, optional
85
		Skip upleg limb scans, i.e. night time scans. For NO retrievals,
86
		those are not used and should be not used here.
87
		Default: True
88
89
	Returns
90
	-------
91
	(dts, times, lats, lons, mlsts, alsts, eotcorr)
92
	"""
93
	fail = (None,) * 7
94
	if spec_base is None:
95
		spec_base = os.curdir
96
	spec_path = "{0}/{1}".format(spec_base.rstrip('/'), year)
97
	spec_path2 = "{0}/{1}".format(spec_base.rstrip('/'), int(year) + 1)
98
	logging.debug("spec_path: %s", spec_path)
99
	logging.debug("spec_path2: %s", spec_path)
100
	if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)):
101
		return fail
102
103
	# the star stands for the (optional) date subdir
104
	# to find all spectra for the orbit
105
	spfiles = glob.glob(
106
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
107
			.format(spec_path, orbit))
108
	# sometimes for whatever reason the orbit ends up in the wrong year subdir
109
	# looks in the subdir for the following year as well.
110
	spfiles += glob.glob(
111
			'{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary'
112
			.format(spec_path2, orbit))
113
	spdict = dict([(fn, os.path.basename(fn).split('_')[2:4])
114
				for fn in spfiles])
115
	logging.debug("spdict: %s", spdict)
116
	if len(spfiles) < 2:
117
		return fail
118
119
	sorted_spdkeys = sorted(spdict.keys())
120
121
	slscans = [sl.scia_limb_scan() for _ in spdict]
122
	[s.read_from_file(f) for s, f in zip(slscans, sorted_spdkeys)]
123
124
	lsts = [(s.cent_lat_lon[:2],
125
			s.local_solar_time(False),
126
			s.limb_data.tp_lat)
127
			for s in slscans]
128
	lstdict = dict(zip(sorted_spdkeys, lsts))
129
	logging.debug("lstdict: %s", lstdict)
130
131
	dts = []
132
	times = []
133
	lats = []
134
	lons = []
135
	mlsts = []
136
	alsts = []
137
138
	for key in sorted_spdkeys:
139
		(lat, lon), (mlst, alst, eotcorr), tp_lats = lstdict[key]
140
		logging.debug("lat: %s, lon: %s", lat, lon)
141
		if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5):
142
			# Exclude non-downleg measurements where the latitude
143
			# of the last real tangent point (the last is dark sky)
144
			# is larger than or too close to the first latitude.
145
			# Requires an (empirical) separation of +0.5 degree.
146
			logging.debug("excluding upleg point at: %s, %s", lat, lon)
147
			continue
148
		dtdate = pd.to_datetime(''.join(spdict[key]),
149
				format="%Y%m%d%H%M%S", utc=True)
150
		time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0
151
		logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr)
152
		dts.append(dtdate)
153
		times.append(time_hour)
154
		lats.append(lat)
155
		lons.append(lon)
156
		mlsts.append(mlst)
157
		alsts.append(alst)
158
159
	if len(lats) < 2:
160
		# interpolation will fail with less than 2 points
161
		return fail
162
163
	return (np.asarray(dts),
164
			np.asarray(times),
165
			np.asarray(lats),
166
			np.asarray(lons),
167
			np.asarray(mlsts),
168
			np.asarray(alsts), eotcorr)
0 ignored issues
show
introduced by
The variable eotcorr does not seem to be defined in case the for loop on line 138 is not entered. Are you sure this can never be the case?
Loading history...
169
170 View Code Duplication
def process_orbit(orbit,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
171
		ref_date="1950-01-01",
172
		dens_path=None, spec_base=None):
173
	"""Post process retrieved SCIAMACHY orbit
174
175
	Parameters
176
	----------
177
	orbit: int
178
		SCIAMACHY/Envisat orbit number of the results to process.
179
	ref_date: str, optional
180
		Base date to calculate the relative days from,
181
		of the format "%Y-%m-%d". Default: 1950-01-01
182
	dens_path: str, optional
183
		The path to the level 2 data. Uses the current
184
		dir if not set or set to `None` (default).
185
	spec_base: str, optional
186
		The root path to the level 1c spectra. Uses the current
187
		dir if not set or set to `None` (default).
188
189
	Returns
190
	-------
191
	(dts0, time0, lst0, lon0, sdd): tuple
192
		dts0 - days since ref_date at equator crossing (float)
193
		time0 - utc hour into the day at equator crossing (float)
194
		lst0 - apparent local solar time at the equator (float)
195
		lon0 - longitude of the equator crossing (float)
196
		sdd - `scia_density_pp` instance of the post-processed data
197
	"""
198
	fail = (None,) * 5
199
	logging.debug("processing orbit: %s", orbit)
200
	dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True)
201
202
	dfiles = glob.glob(
203
			"{0}/000NO_orbit_{1:05d}_*_Dichten.txt"
204
			.format(dens_path, orbit))
205
	if len(dfiles) < 1:
206
		return fail
207
	logging.debug("dfiles: %s", dfiles)
208
	logging.debug("splits: %s", [fn.split('/') for fn in dfiles])
209
	ddict = dict([(fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4]))
210
				for fn in dfiles])
211
	logging.debug("ddict: %s", ddict)
212
	year = ddict[sorted(ddict.keys())[0]][-1][:4]
213
	logging.debug("year: %s", year)
214
215
	dts, times, lats, lons, mlsts, alsts, eotcorr = \
216
			read_spectra(year, orbit, spec_base)
217
	if dts is None:
218
		# return early if reading the spectra failed
219
		return fail
220
221
	dts = pd.to_datetime(dts) - dtrefdate
222
	dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts])
223
	logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times)
224
225
	sdd = sd.scia_densities_pp(ref_date=ref_date)
226
	sdd.read_from_file(dfiles[0])
227
	logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons)
228
229
	# longitudes
230
	clons_retr_interpf = interp1d(lats[::-1], np.cos(np.radians(lons[::-1])), fill_value="extrapolate")
231
	slons_retr_interpf = interp1d(lats[::-1], np.sin(np.radians(lons[::-1])), fill_value="extrapolate")
232
	# apparent local solar time (EoT corrected)
233
	clst_retr_interpf = interp1d(lats[::-1], np.cos(np.pi / 12. * alsts[::-1]), fill_value="extrapolate")
234
	slst_retr_interpf = interp1d(lats[::-1], np.sin(np.pi / 12. * alsts[::-1]), fill_value="extrapolate")
235
	# mean local solar time
236
	cmst_retr_interpf = interp1d(lats[::-1], np.cos(np.pi / 12. * mlsts[::-1]), fill_value="extrapolate")
237
	smst_retr_interpf = interp1d(lats[::-1], np.sin(np.pi / 12. * mlsts[::-1]), fill_value="extrapolate")
238
	# utc time (day)
239
	ctime_retr_interpf = interp1d(lats[::-1], np.cos(np.pi / 12. * times[::-1]), fill_value="extrapolate")
240
	stime_retr_interpf = interp1d(lats[::-1], np.sin(np.pi / 12. * times[::-1]), fill_value="extrapolate")
241
	dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate")
242
243
	# equator values
244
	lon0 = np.degrees(np.arctan2(slons_retr_interpf(0.), clons_retr_interpf(0.)) % (2. * np.pi))
245
	lst0 = (np.arctan2(slst_retr_interpf(0.), clst_retr_interpf(0.)) % (2. * np.pi)) * 12. / np.pi
246
	mst0 = (np.arctan2(smst_retr_interpf(0.), cmst_retr_interpf(0.)) % (2. * np.pi)) * 12. / np.pi
247
	time0 = (np.arctan2(stime_retr_interpf(0.), ctime_retr_interpf(0.)) % (2. * np.pi)) * 12. / np.pi
248
	dts_retr_interp0 = dts_retr_interpf(0.)
249
	logging.debug("utc day at equator: %s", dts_retr_interp0)
250
	logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0)
251
252
	sdd.utchour = (np.arctan2(stime_retr_interpf(sdd.lats),
253
						ctime_retr_interpf(sdd.lats)) % (2. * np.pi)) * 12. / np.pi
254
	sdd.utcdays = dts_retr_interpf(sdd.lats)
255
256
	if sdd.lons is None:
257
		# recalculate the longitudes
258
		# estimate the equatorial longitude from the
259
		# limb scan latitudes and longitudes
260
		lon0s_tp = lons - phi_fac * np.tan(np.radians(lats))
261
		clon0s_tp = np.cos(np.radians(lon0s_tp))
262
		slon0s_tp = np.sin(np.radians(lon0s_tp))
263
		lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1]))
264
		lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi))
265
		logging.info("lon0: %s", lon0)
266
		logging.info("lon0 tp: %s", lon0_tp)
267
		# interpolate to the retrieval latitudes
268
		tg_retr_lats = np.tan(np.radians(sdd.lats))
269
		calc_lons = (tg_retr_lats * phi_fac + lon0) % 360.
270
		calc_lons_tp = (tg_retr_lats * phi_fac + lon0_tp) % 360.
271
		sdd.lons = calc_lons_tp
272
		logging.debug("(calculated) retrieval lons: %s, %s",
273
				calc_lons, calc_lons_tp)
274
	else:
275
		#sdd.lons = sdd.lons % 360.
276
		logging.debug("(original) retrieval lons: %s", sdd.lons)
277
278
	sdd.mst = (sdd.utchour + sdd.lons / 15.) % 24.
279
	sdd.lst = sdd.mst + eotcorr / 60.
280
281
	dt_date_this = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
282
	logging.info("date: %s", dt_date_this)
283
	# caclulate geomagnetic coordinates
284
	sdd.gmlats, sdd.gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=100.)
285
	logging.debug("geomag. lats: %s, lons: %s", sdd.gmlats, sdd.gmlons)
286
	sdd.aacgmgmlats, sdd.aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons)
287
	logging.debug("aacgm geomag. lats: %s, lons: %s",
288
			sdd.aacgmgmlats, sdd.aacgmgmlons)
289
290
	# current day for MSIS input
291
	msis_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate
292
	msis_dtdate1 = msis_dtdate - dt.timedelta(days=1)
293
	msis_date = msis_dtdate.strftime("%Y-%m-%d").encode()
294
	msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode()
295
	msis_f107 = f107_data[msis_date1]
296
	msis_f107a = f107a_data[msis_date]
297
	msis_ap = ap_data[msis_date]
298
	logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s",
299
			msis_date, msis_f107a, msis_f107, msis_ap)
300
301
	# previous day for NOEM input
302
	noem_date = (dt.timedelta(np.asscalar(dts_retr_interp0) - 1) +
303
					dtrefdate).strftime("%Y-%m-%d").encode()
304
	noem_f107 = f107_adj[noem_date]
305
	noem_kp = kp_data[noem_date]
306
	logging.debug("NOEM date: %s, f10.7: %s, kp: %s",
307
			noem_date, noem_f107, noem_kp)
308
309
	if sdd.noem_no is None:
310
		sdd.noem_no = np.zeros_like(sdd.densities)
311
	if sdd.temperature is None:
312
		sdd.temperature = np.zeros_like(sdd.densities)
313
	if sdd.sza is None:
314
		sdd.sza = np.zeros_like(sdd.lats)
315
	if sdd.akdiag is None:
316
		sdd.akdiag = np.zeros_like(sdd.densities)
317
		akm_filename = glob.glob(
318
				"{0}/000NO_orbit_{1:05d}_*_AKM*"
319
				.format(dens_path, orbit))[0]
320
		logging.debug("ak file: %s", akm_filename)
321
		ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat)
322
		logging.debug("ak data: %s", ak)
323
		sdd.akdiag = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1)
324
325
	_msis_d_t = msise(
326
		msis_dtdate,
327
		sdd.alts[None, :], sdd.lats[:, None], sdd.lons[:, None] % 360.,
328
		msis_f107a, msis_f107, msis_ap,
329
		lst=sdd.lst[:, None],
330
	)
331
	sdd.dens_tot = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2)
332
	sdd.temperature = _msis_d_t[:, :, -1]
333
	for i, lat in enumerate(sdd.lats):
334
		if noem_cpp is not None:
335
			sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts,
336
					[lat], [sdd.lons[i]], noem_f107, noem_kp)[:]
337
		else:
338
			sdd.noem_no[i][:] = np.nan
339
		sdd.sza[i] = 90. - sun_alt_func(lat, sdd.lons[i],
340
				dt.timedelta(np.asscalar(sdd.utcdays[i])) + dtrefdate,
341
				elevation=sdd.alts.mean() * 1000.)
342
	sdd.vmr = sdd.densities / sdd.dens_tot * 1.e9  # ppb
343
	return dts_retr_interp0, time0, lst0, lon0, sdd
344
345 View Code Duplication
def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
346
	"""Find SCIAMACHY orbits with retrieved data at a date
347
348
	Parameters
349
	----------
350
	date: str
351
		The date in the format "%Y-%m-%d".
352
	mlt: bool, optional
353
		Look for MLT mode data instead of nominal mode data.
354
		Increases the heuristics to find all MLT orbits.
355
	path: str, optional
356
		The path to the level 2 data. If `None` tries to infer
357
		the data directory from the L2 version using
358
		'./*<L2_version>'. Default: None
359
360
	Returns
361
	-------
362
	orbits: list
363
		List of found orbits with retrieved data files
364
	"""
365
	logging.debug("pre-processing: %s", date)
366
	if path is None:
367
		density_base = os.curdir
368
		path = "{0}/*{1}".format(density_base, L2_version)
369
		logging.debug("path: %s", path)
370
371
	dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format(
372
				path, date.replace("-", "")))
373
	orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles])
374
	if mlt:
375
		orbits.append(orbits[-1] + 1)
376
	return orbits
377
378 View Code Duplication
def combine_orbit_data(orbits,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
379
		ref_date="1950-01-01",
380
		L2_version="v6.2", file_version="2.3",
381
		dens_path=None, spec_base=None,
382
		use_xarray=False, save_nc=False):
383
	"""Combine post-processed SCIAMACHY retrieved orbit data
384
385
	Parameters
386
	----------
387
	orbits: list
388
		List of SCIAMACHY/Envisat orbit numbers to process.
389
	ref_date: str, optional
390
		Base date to calculate the relative days from,
391
		of the format "%Y-%m-%d". Default: 1950-01-01
392
	L2_version: str, optional
393
		SCIAMACHY level 2 data version to process
394
	file_version: str, optional
395
		Postprocessing format version of the output data
396
	dens_path: str, optional
397
		The path to the level 2 data. If `None` tries to infer
398
		the data directory from the L2 version looking for anything
399
		in the current directory that ends in <L2_version>: './*<L2_version>'.
400
		Default: None
401
	spec_base: str, optional
402
		The root path to the level 1c spectra. Uses the current
403
		dir if not set or set to `None` (default).
404
	use_xarray: bool, optional
405
		Uses xarray (if available) to combine the orbital data.
406
	save_nc: bool, optional
407
		Save the intermediate orbit data sets to netcdf files
408
		for debugging.
409
410
	Returns
411
	-------
412
	(sdday, sdday_ds): tuple
413
		`sdday` contains the combined data as a `scia_density_day` instance,
414
		`sdday_ds` contains the same data as a `xarray.Dataset`.
415
	"""
416
	if dens_path is None:
417
		# try some heuristics
418
		density_base = os.curdir
419
		dens_path = "{0}/*{1}".format(density_base, L2_version)
420
421
	sdday = sd.scia_density_day(ref_date=ref_date)
422
	sddayl = []
423
	sdday_ds = None
424
	for orbit in sorted(orbits):
425
		dateo, timeo, lsto, lono, sdens = process_orbit(orbit,
426
				ref_date=ref_date, dens_path=dens_path, spec_base=spec_base)
427
		logging.info("orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s",
428
				orbit, dateo, timeo, lsto, lono)
429
		if sdens is not None:
430
			sdens.version = file_version
431
			sdens.data_version = L2_version
432
			sdday.append_data(dateo, orbit, timeo, sdens)
433
			if use_xarray:
434
				sd_xr = sdens.to_xarray(dateo, orbit)
435
				if sd_xr is not None:
436
					logging.debug("orbit %s dataset: %s", orbit, sd_xr)
437
					sddayl.append(sd_xr)
438
			if save_nc:
439
				sdens.write_to_netcdf(sdens.filename[:-3] + "nc")
440
	if use_xarray and sddayl:
441
		sdday_ds = xr.concat(sddayl, dim="time")
442
	return sdday, sdday_ds
443
444 View Code Duplication
def sddata_xr_set_attrs(sdday_xr, ref_date="1950-01-01", rename=True):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
445
	"""Customize xarray Dataset variables and attributes
446
447
	Changes the variable names to match those exported from the
448
	`scia_density_day` class.
449
450
	Parameters
451
	----------
452
	sdday_xr: xarray.Dataset instance
453
	ref_date: str, optional
454
		Base date to calculate the relative days from,
455
		of the format "%Y-%m-%d". Default: 1950-01-01
456
	rename: bool, optional
457
		Rename the dataset variables to match the
458
		`scia_density_day` exported ones.
459
	"""
460
	if rename:
461
		sdday_xr = sdday_xr.rename({
462
			"density": "NO_DENS", "density_air": "TOT_DENS",
463
			"apriori": "NO_APRIORI", "error_meas": "NO_ERR",
464
			"error_tot": "NO_ETOT",
465
			"NOEM_density": "NO_NOEM", "akm_diagonal": "NO_AKDIAG",
466
			"VMR": "NO_VMR", "temperature": "T_MSIS",
467
			"utc_hour": "UTC", "mean_sza": "mean_SZA",
468
			"app_lst": "app_LST", "mean_lst": "mean_LST",
469
		})
470
	sdday_xr["NO_RSTD"] = 100 * np.abs(sdday_xr.NO_ERR / sdday_xr.NO_DENS)
471
	sdday_xr["NO_RSTD"].attrs = dict(units='%',
472
		long_name='NO relative standard deviation')
473
	# fix coordinate attributes
474
	sdday_xr["time"].attrs = dict(axis='T', standard_name='time',
475
		calendar='standard', long_name='equatorial crossing time',
476
		units="days since {0}".format(
477
			pd.to_datetime(ref_date).isoformat(sep=" ")))
478
	sdday_xr["orbit"].attrs = dict(axis='T', calendar='standard',
479
		long_name='SCIAMACHY/Envisat orbit number', units='1')
480
	sdday_xr["altitude"].attrs = dict(axis='Z', positive='up',
481
		long_name='altitude', standard_name='altitude', units='km')
482
	sdday_xr["latitude"].attrs = dict(axis='Y', long_name='latitude',
483
		standard_name='latitude', units='degrees_north')
484
	sdday_xr["longitude"].attrs = dict(long_name='longitude',
485
		standard_name='longitude', units='degrees_east')
486
	dateo = (pd.to_datetime(
487
			xr.conventions.decode_cf_variable("date", sdday_xr.time).data[0])
488
				.strftime("%Y-%m-%d"))
489
	logging.debug("date %s dataset: %s", dateo, sdday_xr)
490
	return sdday_xr
491
492 View Code Duplication
def main():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
493
	logging.basicConfig(level=logging.WARNING,
494
			format="[%(levelname)-8s] (%(asctime)s) "
495
			"%(filename)s:%(lineno)d %(message)s",
496
			datefmt="%Y-%m-%d %H:%M:%S %z")
497
498
	parser = ap.ArgumentParser()
499
	parser.add_argument("file", default="SCIA_NO.nc", help="the filename of the output netcdf file")
500
	parser.add_argument("-M", "--month", metavar="YEAR-MM",
501
			help="infer start and end dates for month")
502
	parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE",
503
			help="colon-separated start and end dates")
504
	parser.add_argument("-d", "--dates", help="comma-separated list of dates")
505
	parser.add_argument("-f", "--orbit_file", help="the file containing the input orbits")
506
	parser.add_argument("-p", "--path", default=None, help="path containing the L2 data")
507
	parser.add_argument("-r", "--retrieval_version", default="v6.2",
508
			help="SCIAMACHY level 2 data version to process")
509
	parser.add_argument("-R", "--file_version", default="2.3",
510
			help="Postprocessing format version of the output file")
511
	parser.add_argument("-s", "--spectra", default=None,
512
			help="path containing the L1c spectra")
513
	parser.add_argument("-m", "--mlt", action="store_true", default=False,
514
			help="indicate nominal (False, default) or MLT data (True)")
515
	parser.add_argument("-X", "--xarray", action="store_true", default=False,
516
			help="use xarray to prepare the dataset (experimental, default %(default)s)")
517
	loglevels = parser.add_mutually_exclusive_group()
518
	loglevels.add_argument("-l", "--loglevel", default="WARNING",
519
			choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
520
			help="change the loglevel (default: 'WARNING')")
521
	loglevels.add_argument("-q", "--quiet", action="store_true", default=False,
522
			help="less output, same as --loglevel=ERROR (default: False)")
523
	loglevels.add_argument("-v", "--verbose", action="store_true", default=False,
524
			help="verbose output, same as --loglevel=INFO (default: False)")
525
	args = parser.parse_args()
526
	if args.quiet:
527
		logging.getLogger().setLevel(logging.ERROR)
528
	elif args.verbose:
529
		logging.getLogger().setLevel(logging.INFO)
530
	else:
531
		logging.getLogger().setLevel(args.loglevel)
532
533
	logging.info("processing L2 version: %s", args.retrieval_version)
534
	logging.info("writing data file version: %s", args.file_version)
535
536
	pddrange = []
537
	if args.month is not None:
538
		d0 = pd.to_datetime(args.month + "-01")
539
		pddrange += pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd())
540
	if args.date_range is not None:
541
		pddrange += pd.date_range(*args.date_range.split(':'))
542
	if args.dates is not None:
543
		pddrange += pd.to_datetime(args.dates.split(','))
544
	logging.debug("pddrange: %s", pddrange)
545
546
	olist = []
547
	for date in pddrange:
548
		try:
549
			olist += get_orbits_from_date(date.strftime("%Y-%m-%d"),
550
				mlt=args.mlt, path=args.path, L2_version=args.retrieval_version)
551
		except:  # handle NaT
552
			pass
553
	if args.orbit_file is not None:
554
		olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist()
555
	logging.debug("olist: %s", olist)
556
557
	if not olist:
558
		logging.warn("No orbits to process.")
559
		return
560
561
	sdlist, sdxr_ds = combine_orbit_data(olist,
562
			ref_date="2000-01-01",
563
			L2_version=args.retrieval_version, file_version=args.file_version,
564
			dens_path=args.path, spec_base=args.spectra, use_xarray=args.xarray,
565
			save_nc=False)
566
567
	if args.xarray and sdxr_ds is not None:
568
		sd_xr = sddata_xr_set_attrs(sdxr_ds, ref_date="2000-01-01")
569
		sd_xr2 = sdlist.to_xarray()
570
		logging.debug(sd_xr)
571
		logging.debug(sd_xr2)
572
		logging.debug("equal datasets: %s", sd_xr.equals(sd_xr2))
573
		xr.testing.assert_allclose(sd_xr, sd_xr2)
574
		if sd_xr2 is not None:
575
			logging.debug("xarray dataset: %s", sd_xr2)
576
			sd_xr2.to_netcdf(args.file, unlimited_dims=["time"])
577
	else:
578
		if sdlist.no_dens is not None:
579
			sdlist.write_to_netcdf(args.file)
580
		else:
581
			logging.warn("Processed data is empty.")
582
583
584
if __name__ == "__main__":
585
	main()
586