Total Complexity | 54 |
Total Lines | 630 |
Duplicated Lines | 83.97 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like scia_post_process_l2 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | #!/usr/bin/env python |
||
2 | # vim: set fileencoding=utf-8 |
||
3 | """SCIAMACHY level 2 data post processing |
||
4 | |||
5 | Main script for SCIAMACHY orbital retrieval post processing |
||
6 | and data combining (to netcdf). |
||
7 | |||
8 | Copyright (c) 2018 Stefan Bender |
||
9 | |||
10 | This file is part of sciapy. |
||
11 | sciapy is free software: you can redistribute it or modify it |
||
12 | under the terms of the GNU General Public License as published by |
||
13 | the Free Software Foundation, version 2. |
||
14 | See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html. |
||
15 | """ |
||
16 | |||
17 | from __future__ import absolute_import, division, print_function |
||
18 | |||
19 | import glob |
||
20 | import os |
||
21 | import argparse as ap |
||
22 | import datetime as dt |
||
23 | import logging |
||
24 | |||
25 | import numpy as np |
||
26 | import pandas as pd |
||
27 | import xarray as xr |
||
28 | from scipy.interpolate import interp1d |
||
29 | try: |
||
30 | import pysolar.solar as sol |
||
31 | sun_alt_func = sol.get_altitude |
||
32 | except ImportError: # pysolar 0.6 (for python 2) |
||
33 | import Pysolar.solar as sol |
||
34 | sun_alt_func = sol.GetAltitude |
||
35 | |||
36 | import sciapy.level1c as sl |
||
37 | from sciapy.level2 import density_pp as sd |
||
38 | from sciapy.level2 import scia_akm as sa |
||
39 | from sciapy.level2.igrf import gmag_igrf |
||
40 | from sciapy.level2.aacgm2005 import gmag_aacgm2005 |
||
41 | try: |
||
42 | from nrlmsise00 import msise_flat as msise |
||
43 | except ImportError: |
||
44 | msise = None |
||
45 | try: |
||
46 | from sciapy.level2.noem import noem_cpp |
||
47 | except ImportError: |
||
48 | noem_cpp = None |
||
49 | |||
50 | this_dir = os.path.realpath(os.path.dirname(__file__)) |
||
51 | f107_data = dict(np.genfromtxt( |
||
52 | os.path.join(this_dir, "../data/indices/f107_noontime_flux_obs.txt"), |
||
53 | usecols=[0, 2], dtype=None)) |
||
54 | f107a_data = dict(np.genfromtxt( |
||
55 | os.path.join(this_dir, "../data/indices/f107a_noontime_flux_obs.txt"), |
||
56 | usecols=[0, 2], dtype=None)) |
||
57 | ap_data = dict(np.genfromtxt( |
||
58 | os.path.join(this_dir, "../data/indices/spidr_ap_2000-2012.dat"), |
||
59 | usecols=[0, 2], dtype=None)) |
||
60 | f107_adj = dict(np.genfromtxt( |
||
61 | os.path.join(this_dir, "../data/indices/spidr_f107_2000-2012.dat"), |
||
62 | usecols=[0, 2], dtype=None)) |
||
63 | kp_data = dict(np.genfromtxt( |
||
64 | os.path.join(this_dir, "../data/indices/spidr_kp_2000-2012.dat"), |
||
65 | usecols=[0, 2], dtype=None)) |
||
66 | |||
67 | phi_fac = 11.91 |
||
68 | lst_fac = -0.62 |
||
69 | |||
70 | |||
71 | View Code Duplication | def read_spectra(year, orbit, spec_base=None, skip_upleg=True): |
|
|
|||
72 | """Read and examine SCIAMACHY orbit spectra |
||
73 | |||
74 | Reads the limb spactra and extracts the dates, times, latitudes, |
||
75 | longitudes to be used to re-assess the retrieved geolocations. |
||
76 | |||
77 | Parameters |
||
78 | ---------- |
||
79 | year: int |
||
80 | The measurement year to select the corresponding subdir |
||
81 | below `spec_base` (see below). |
||
82 | orbit: int |
||
83 | SCIAMACHY/Envisat orbit number of the spectra. |
||
84 | spec_base: str, optional |
||
85 | The root path to the level 1c spectra. Uses the current |
||
86 | dir if not set or set to `None` (default). |
||
87 | skip_upleg: bool, optional |
||
88 | Skip upleg limb scans, i.e. night time scans. For NO retrievals, |
||
89 | those are not used and should be not used here. |
||
90 | Default: True |
||
91 | |||
92 | Returns |
||
93 | ------- |
||
94 | (dts, times, lats, lons, mlsts, alsts, eotcorr) |
||
95 | """ |
||
96 | fail = (None,) * 7 |
||
97 | if spec_base is None: |
||
98 | spec_base = os.curdir |
||
99 | spec_path = "{0}/{1}".format(spec_base.rstrip('/'), year) |
||
100 | spec_path2 = "{0}/{1}".format(spec_base.rstrip('/'), int(year) + 1) |
||
101 | logging.debug("spec_path: %s", spec_path) |
||
102 | logging.debug("spec_path2: %s", spec_path) |
||
103 | if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)): |
||
104 | return fail |
||
105 | |||
106 | # the star stands for the (optional) date subdir |
||
107 | # to find all spectra for the orbit |
||
108 | spfiles = glob.glob( |
||
109 | '{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary' |
||
110 | .format(spec_path, orbit)) |
||
111 | # sometimes for whatever reason the orbit ends up in the wrong year subdir |
||
112 | # looks in the subdir for the following year as well. |
||
113 | spfiles += glob.glob( |
||
114 | '{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary' |
||
115 | .format(spec_path2, orbit)) |
||
116 | spdict = dict([(fn, os.path.basename(fn).split('_')[2:4]) |
||
117 | for fn in spfiles]) |
||
118 | logging.debug("spdict: %s", spdict) |
||
119 | if len(spfiles) < 2: |
||
120 | return fail |
||
121 | |||
122 | sorted_spdkeys = sorted(spdict.keys()) |
||
123 | |||
124 | slscans = [sl.scia_limb_scan() for _ in spdict] |
||
125 | [s.read_from_file(f) for s, f in zip(slscans, sorted_spdkeys)] |
||
126 | |||
127 | lsts = [(s.cent_lat_lon[:2], |
||
128 | s.local_solar_time(False), |
||
129 | s.limb_data.tp_lat) |
||
130 | for s in slscans] |
||
131 | lstdict = dict(zip(sorted_spdkeys, lsts)) |
||
132 | logging.debug("lstdict: %s", lstdict) |
||
133 | |||
134 | dts = [] |
||
135 | times = [] |
||
136 | lats = [] |
||
137 | lons = [] |
||
138 | mlsts = [] |
||
139 | alsts = [] |
||
140 | |||
141 | for key in sorted_spdkeys: |
||
142 | (lat, lon), (mlst, alst, eotcorr), tp_lats = lstdict[key] |
||
143 | logging.debug("lat: %s, lon: %s", lat, lon) |
||
144 | if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5): |
||
145 | # Exclude non-downleg measurements where the latitude |
||
146 | # of the last real tangent point (the last is dark sky) |
||
147 | # is larger than or too close to the first latitude. |
||
148 | # Requires an (empirical) separation of +0.5 degree. |
||
149 | logging.debug("excluding upleg point at: %s, %s", lat, lon) |
||
150 | continue |
||
151 | dtdate = pd.to_datetime(''.join(spdict[key]), |
||
152 | format="%Y%m%d%H%M%S", utc=True) |
||
153 | time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0 |
||
154 | logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr) |
||
155 | dts.append(dtdate) |
||
156 | times.append(time_hour) |
||
157 | lats.append(lat) |
||
158 | lons.append(lon) |
||
159 | mlsts.append(mlst) |
||
160 | alsts.append(alst) |
||
161 | |||
162 | if len(lats) < 2: |
||
163 | # interpolation will fail with less than 2 points |
||
164 | return fail |
||
165 | |||
166 | return (np.asarray(dts), |
||
167 | np.asarray(times), |
||
168 | np.asarray(lats), |
||
169 | np.asarray(lons), |
||
170 | np.asarray(mlsts), |
||
171 | np.asarray(alsts), eotcorr) |
||
172 | |||
173 | |||
174 | class _circ_interp(object): |
||
175 | """Interpolation on a circle""" |
||
176 | def __init__(self, x, y, **kw): |
||
177 | self.c_intpf = interp1d(x, np.cos(y), **kw) |
||
178 | self.s_intpf = interp1d(x, np.sin(y), **kw) |
||
179 | |||
180 | def __call__(self, x): |
||
181 | return np.arctan2(self.s_intpf(x), self.c_intpf(x)) |
||
182 | |||
183 | |||
184 | View Code Duplication | def process_orbit( |
|
185 | orbit, |
||
186 | ref_date="1950-01-01", |
||
187 | dens_path=None, |
||
188 | spec_base=None, |
||
189 | use_msis=True, |
||
190 | ): |
||
191 | """Post process retrieved SCIAMACHY orbit |
||
192 | |||
193 | Parameters |
||
194 | ---------- |
||
195 | orbit: int |
||
196 | SCIAMACHY/Envisat orbit number of the results to process. |
||
197 | ref_date: str, optional |
||
198 | Base date to calculate the relative days from, |
||
199 | of the format "%Y-%m-%d". Default: 1950-01-01 |
||
200 | dens_path: str, optional |
||
201 | The path to the level 2 data. Uses the current |
||
202 | dir if not set or set to `None` (default). |
||
203 | spec_base: str, optional |
||
204 | The root path to the level 1c spectra. Uses the current |
||
205 | dir if not set or set to `None` (default). |
||
206 | |||
207 | Returns |
||
208 | ------- |
||
209 | (dts0, time0, lst0, lon0, sdd): tuple |
||
210 | dts0 - days since ref_date at equator crossing (float) |
||
211 | time0 - utc hour into the day at equator crossing (float) |
||
212 | lst0 - apparent local solar time at the equator (float) |
||
213 | lon0 - longitude of the equator crossing (float) |
||
214 | sdd - `scia_density_pp` instance of the post-processed data |
||
215 | """ |
||
216 | fail = (None,) * 5 |
||
217 | logging.debug("processing orbit: %s", orbit) |
||
218 | dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True) |
||
219 | |||
220 | dfiles = glob.glob( |
||
221 | "{0}/000NO_orbit_{1:05d}_*_Dichten.txt" |
||
222 | .format(dens_path, orbit)) |
||
223 | if len(dfiles) < 1: |
||
224 | return fail |
||
225 | logging.debug("dfiles: %s", dfiles) |
||
226 | logging.debug("splits: %s", [fn.split('/') for fn in dfiles]) |
||
227 | ddict = dict([(fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4])) |
||
228 | for fn in dfiles]) |
||
229 | logging.debug("ddict: %s", ddict) |
||
230 | year = ddict[sorted(ddict.keys())[0]][-1][:4] |
||
231 | logging.debug("year: %s", year) |
||
232 | |||
233 | dts, times, lats, lons, mlsts, alsts, eotcorr = \ |
||
234 | read_spectra(year, orbit, spec_base) |
||
235 | if dts is None: |
||
236 | # return early if reading the spectra failed |
||
237 | return fail |
||
238 | |||
239 | dts = pd.to_datetime(dts) - dtrefdate |
||
240 | dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts]) |
||
241 | logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times) |
||
242 | |||
243 | sdd = sd.scia_densities_pp(ref_date=ref_date) |
||
244 | sdd.read_from_file(dfiles[0]) |
||
245 | logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons) |
||
246 | |||
247 | # Re-interpolates the location (longitude) and times from the |
||
248 | # limb scan spectra files along the orbit to determine the values |
||
249 | # at the Equator and to fill in possibly missing data. |
||
250 | # |
||
251 | # y values are unit circle angles in radians (0 < φ < 2π or -π < φ < π) |
||
252 | # longitudes |
||
253 | lons_intpf = _circ_interp( |
||
254 | lats[::-1], np.radians(lons[::-1]), |
||
255 | fill_value="extrapolate", |
||
256 | ) |
||
257 | # apparent local solar time (EoT corrected) |
||
258 | lst_intpf = _circ_interp( |
||
259 | lats[::-1], np.pi / 12. * alsts[::-1], |
||
260 | fill_value="extrapolate", |
||
261 | ) |
||
262 | # mean local solar time |
||
263 | mst_intpf = _circ_interp( |
||
264 | lats[::-1], np.pi / 12. * mlsts[::-1], |
||
265 | fill_value="extrapolate", |
||
266 | ) |
||
267 | # utc time (day) |
||
268 | time_intpf = _circ_interp( |
||
269 | lats[::-1], np.pi / 12. * times[::-1], |
||
270 | fill_value="extrapolate", |
||
271 | ) |
||
272 | # datetime |
||
273 | dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate") |
||
274 | |||
275 | # equator values |
||
276 | lon0 = np.degrees(lons_intpf(0.)) % 360. |
||
277 | lst0 = (lst_intpf(0.) * 12. / np.pi) % 24. |
||
278 | mst0 = (mst_intpf(0.) * 12. / np.pi) % 24. |
||
279 | time0 = (time_intpf(0.) * 12. / np.pi) % 24. |
||
280 | dts_retr_interp0 = dts_retr_interpf(0.) |
||
281 | logging.debug("utc day at equator: %s", dts_retr_interp0) |
||
282 | logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0) |
||
283 | |||
284 | sdd.utchour = (time_intpf(sdd.lats) * 12. / np.pi) % 24. |
||
285 | sdd.utcdays = dts_retr_interpf(sdd.lats) |
||
286 | |||
287 | if sdd.lons is None: |
||
288 | # recalculate the longitudes |
||
289 | # estimate the equatorial longitude from the |
||
290 | # limb scan latitudes and longitudes |
||
291 | lon0s_tp = lons - phi_fac * np.tan(np.radians(lats)) |
||
292 | clon0s_tp = np.cos(np.radians(lon0s_tp)) |
||
293 | slon0s_tp = np.sin(np.radians(lon0s_tp)) |
||
294 | lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1])) |
||
295 | lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi)) |
||
296 | logging.info("lon0: %s", lon0) |
||
297 | logging.info("lon0 tp: %s", lon0_tp) |
||
298 | # interpolate to the retrieval latitudes |
||
299 | tg_retr_lats = np.tan(np.radians(sdd.lats)) |
||
300 | calc_lons = (tg_retr_lats * phi_fac + lon0) % 360. |
||
301 | calc_lons_tp = (tg_retr_lats * phi_fac + lon0_tp) % 360. |
||
302 | sdd.lons = calc_lons_tp |
||
303 | logging.debug("(calculated) retrieval lons: %s, %s", |
||
304 | calc_lons, calc_lons_tp) |
||
305 | else: |
||
306 | # sdd.lons = sdd.lons % 360. |
||
307 | logging.debug("(original) retrieval lons: %s", sdd.lons) |
||
308 | |||
309 | sdd.mst = (sdd.utchour + sdd.lons / 15.) % 24. |
||
310 | sdd.lst = sdd.mst + eotcorr / 60. |
||
311 | |||
312 | dt_date_this = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate |
||
313 | logging.info("date: %s", dt_date_this) |
||
314 | # caclulate geomagnetic coordinates |
||
315 | sdd.gmlats, sdd.gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=100.) |
||
316 | logging.debug("geomag. lats: %s, lons: %s", sdd.gmlats, sdd.gmlons) |
||
317 | sdd.aacgmgmlats, sdd.aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons) |
||
318 | logging.debug("aacgm geomag. lats: %s, lons: %s", |
||
319 | sdd.aacgmgmlats, sdd.aacgmgmlons) |
||
320 | |||
321 | # current day for MSIS input |
||
322 | msis_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate |
||
323 | msis_dtdate1 = msis_dtdate - dt.timedelta(days=1) |
||
324 | msis_date = msis_dtdate.strftime("%Y-%m-%d").encode() |
||
325 | msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode() |
||
326 | msis_f107 = f107_data[msis_date1] |
||
327 | msis_f107a = f107a_data[msis_date] |
||
328 | msis_ap = ap_data[msis_date] |
||
329 | logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s", |
||
330 | msis_date, msis_f107a, msis_f107, msis_ap) |
||
331 | |||
332 | # previous day for NOEM input |
||
333 | noem_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0) - 1) + dtrefdate |
||
334 | noem_date = noem_dtdate.strftime("%Y-%m-%d").encode() |
||
335 | noem_f107 = f107_adj[noem_date] |
||
336 | noem_kp = kp_data[noem_date] |
||
337 | logging.debug("NOEM date: %s, f10.7: %s, kp: %s", |
||
338 | noem_date, noem_f107, noem_kp) |
||
339 | |||
340 | if sdd.noem_no is None: |
||
341 | sdd.noem_no = np.zeros_like(sdd.densities) |
||
342 | if sdd.temperature is None and msise is None: |
||
343 | sdd.temperature = np.full_like(sdd.densities, np.nan) |
||
344 | if sdd.sza is None: |
||
345 | sdd.sza = np.zeros_like(sdd.lats) |
||
346 | if sdd.akdiag is None: |
||
347 | sdd.akdiag = np.zeros_like(sdd.densities) |
||
348 | akm_filename = glob.glob( |
||
349 | "{0}/000NO_orbit_{1:05d}_*_AKM*" |
||
350 | .format(dens_path, orbit))[0] |
||
351 | logging.debug("ak file: %s", akm_filename) |
||
352 | ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat) |
||
353 | logging.debug("ak data: %s", ak) |
||
354 | sdd.akdiag = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1) |
||
355 | |||
356 | if msise is not None: |
||
357 | if sdd.temperature is None or use_msis: |
||
358 | _msis_d_t = msise( |
||
359 | msis_dtdate, |
||
360 | sdd.alts[None, :], sdd.lats[:, None], sdd.lons[:, None] % 360., |
||
361 | msis_f107a, msis_f107, msis_ap, |
||
362 | lst=sdd.lst[:, None], |
||
363 | ) |
||
364 | sdd.temperature = _msis_d_t[:, :, -1] |
||
365 | if use_msis: |
||
366 | sdd.dens_tot = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2) |
||
367 | for i, lat in enumerate(sdd.lats): |
||
368 | if noem_cpp is not None: |
||
369 | sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts, |
||
370 | [lat], [sdd.lons[i]], noem_f107, noem_kp)[:] |
||
371 | else: |
||
372 | sdd.noem_no[i][:] = np.nan |
||
373 | sdd.sza[i] = 90. - sun_alt_func(lat, sdd.lons[i], |
||
374 | dt.timedelta(np.asscalar(sdd.utcdays[i])) + dtrefdate, |
||
375 | elevation=sdd.alts.mean() * 1000.) |
||
376 | sdd.vmr = sdd.densities / sdd.dens_tot * 1.e9 # ppb |
||
377 | return dts_retr_interp0, time0, lst0, lon0, sdd |
||
378 | |||
379 | |||
380 | View Code Duplication | def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"): |
|
381 | """Find SCIAMACHY orbits with retrieved data at a date |
||
382 | |||
383 | Parameters |
||
384 | ---------- |
||
385 | date: str |
||
386 | The date in the format "%Y-%m-%d". |
||
387 | mlt: bool, optional |
||
388 | Look for MLT mode data instead of nominal mode data. |
||
389 | Increases the heuristics to find all MLT orbits. |
||
390 | path: str, optional |
||
391 | The path to the level 2 data. If `None` tries to infer |
||
392 | the data directory from the L2 version using |
||
393 | './*<L2_version>'. Default: None |
||
394 | |||
395 | Returns |
||
396 | ------- |
||
397 | orbits: list |
||
398 | List of found orbits with retrieved data files |
||
399 | """ |
||
400 | logging.debug("pre-processing: %s", date) |
||
401 | if path is None: |
||
402 | density_base = os.curdir |
||
403 | path = "{0}/*{1}".format(density_base, L2_version) |
||
404 | logging.debug("path: %s", path) |
||
405 | |||
406 | dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format( |
||
407 | path, date.replace("-", ""))) |
||
408 | orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles]) |
||
409 | if mlt: |
||
410 | orbits.append(orbits[-1] + 1) |
||
411 | return orbits |
||
412 | |||
413 | |||
414 | View Code Duplication | def combine_orbit_data(orbits, |
|
415 | ref_date="1950-01-01", |
||
416 | L2_version="v6.2", file_version="2.3", |
||
417 | dens_path=None, spec_base=None, |
||
418 | use_xarray=False, save_nc=False): |
||
419 | """Combine post-processed SCIAMACHY retrieved orbit data |
||
420 | |||
421 | Parameters |
||
422 | ---------- |
||
423 | orbits: list |
||
424 | List of SCIAMACHY/Envisat orbit numbers to process. |
||
425 | ref_date: str, optional |
||
426 | Base date to calculate the relative days from, |
||
427 | of the format "%Y-%m-%d". Default: 1950-01-01 |
||
428 | L2_version: str, optional |
||
429 | SCIAMACHY level 2 data version to process |
||
430 | file_version: str, optional |
||
431 | Postprocessing format version of the output data |
||
432 | dens_path: str, optional |
||
433 | The path to the level 2 data. If `None` tries to infer |
||
434 | the data directory from the L2 version looking for anything |
||
435 | in the current directory that ends in <L2_version>: './*<L2_version>'. |
||
436 | Default: None |
||
437 | spec_base: str, optional |
||
438 | The root path to the level 1c spectra. Uses the current |
||
439 | dir if not set or set to `None` (default). |
||
440 | use_xarray: bool, optional |
||
441 | Uses xarray (if available) to combine the orbital data. |
||
442 | save_nc: bool, optional |
||
443 | Save the intermediate orbit data sets to netcdf files |
||
444 | for debugging. |
||
445 | |||
446 | Returns |
||
447 | ------- |
||
448 | (sdday, sdday_ds): tuple |
||
449 | `sdday` contains the combined data as a `scia_density_day` instance, |
||
450 | `sdday_ds` contains the same data as a `xarray.Dataset`. |
||
451 | """ |
||
452 | if dens_path is None: |
||
453 | # try some heuristics |
||
454 | density_base = os.curdir |
||
455 | dens_path = "{0}/*{1}".format(density_base, L2_version) |
||
456 | |||
457 | sdday = sd.scia_density_day(ref_date=ref_date) |
||
458 | sddayl = [] |
||
459 | sdday_ds = None |
||
460 | for orbit in sorted(orbits): |
||
461 | dateo, timeo, lsto, lono, sdens = process_orbit(orbit, |
||
462 | ref_date=ref_date, dens_path=dens_path, spec_base=spec_base) |
||
463 | logging.info( |
||
464 | "orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s", |
||
465 | orbit, dateo, timeo, lsto, lono |
||
466 | ) |
||
467 | if sdens is not None: |
||
468 | sdens.version = file_version |
||
469 | sdens.data_version = L2_version |
||
470 | sdday.append_data(dateo, orbit, timeo, sdens) |
||
471 | if use_xarray: |
||
472 | sd_xr = sdens.to_xarray(dateo, orbit) |
||
473 | if sd_xr is not None: |
||
474 | logging.debug("orbit %s dataset: %s", orbit, sd_xr) |
||
475 | sddayl.append(sd_xr) |
||
476 | if save_nc: |
||
477 | sdens.write_to_netcdf(sdens.filename[:-3] + "nc") |
||
478 | if use_xarray and sddayl: |
||
479 | sdday_ds = xr.concat(sddayl, dim="time") |
||
480 | return sdday, sdday_ds |
||
481 | |||
482 | |||
483 | View Code Duplication | def sddata_xr_set_attrs(sdday_xr, ref_date="1950-01-01", rename=True): |
|
484 | """Customize xarray Dataset variables and attributes |
||
485 | |||
486 | Changes the variable names to match those exported from the |
||
487 | `scia_density_day` class. |
||
488 | |||
489 | Parameters |
||
490 | ---------- |
||
491 | sdday_xr: xarray.Dataset instance |
||
492 | ref_date: str, optional |
||
493 | Base date to calculate the relative days from, |
||
494 | of the format "%Y-%m-%d". Default: 1950-01-01 |
||
495 | rename: bool, optional |
||
496 | Rename the dataset variables to match the |
||
497 | `scia_density_day` exported ones. |
||
498 | """ |
||
499 | if rename: |
||
500 | sdday_xr = sdday_xr.rename({ |
||
501 | "density": "NO_DENS", "density_air": "TOT_DENS", |
||
502 | "apriori": "NO_APRIORI", "error_meas": "NO_ERR", |
||
503 | "error_tot": "NO_ETOT", |
||
504 | "NOEM_density": "NO_NOEM", "akm_diagonal": "NO_AKDIAG", |
||
505 | "VMR": "NO_VMR", |
||
506 | "utc_hour": "UTC", "mean_sza": "mean_SZA", |
||
507 | "app_lst": "app_LST", "mean_lst": "mean_LST", |
||
508 | }) |
||
509 | sdday_xr["NO_RSTD"] = 100 * np.abs(sdday_xr.NO_ERR / sdday_xr.NO_DENS) |
||
510 | sdday_xr["NO_RSTD"].attrs = dict(units='%', |
||
511 | long_name='NO relative standard deviation') |
||
512 | # fix coordinate attributes |
||
513 | sdday_xr["time"].attrs = dict(axis='T', standard_name='time', |
||
514 | calendar='standard', long_name='equatorial crossing time', |
||
515 | units="days since {0}".format( |
||
516 | pd.to_datetime(ref_date).isoformat(sep=" "))) |
||
517 | sdday_xr["orbit"].attrs = dict(axis='T', calendar='standard', |
||
518 | long_name='SCIAMACHY/Envisat orbit number', units='1') |
||
519 | sdday_xr["altitude"].attrs = dict(axis='Z', positive='up', |
||
520 | long_name='altitude', standard_name='altitude', units='km') |
||
521 | sdday_xr["latitude"].attrs = dict(axis='Y', long_name='latitude', |
||
522 | standard_name='latitude', units='degrees_north') |
||
523 | sdday_xr["longitude"].attrs = dict(long_name='longitude', |
||
524 | standard_name='longitude', units='degrees_east') |
||
525 | dateo = (pd.to_datetime( |
||
526 | xr.conventions.decode_cf_variable("date", sdday_xr.time).data[0]) |
||
527 | .strftime("%Y-%m-%d")) |
||
528 | logging.debug("date %s dataset: %s", dateo, sdday_xr) |
||
529 | return sdday_xr |
||
530 | |||
531 | |||
532 | View Code Duplication | def main(): |
|
533 | logging.basicConfig(level=logging.WARNING, |
||
534 | format="[%(levelname)-8s] (%(asctime)s) " |
||
535 | "%(filename)s:%(lineno)d %(message)s", |
||
536 | datefmt="%Y-%m-%d %H:%M:%S %z") |
||
537 | |||
538 | parser = ap.ArgumentParser() |
||
539 | parser.add_argument("file", default="SCIA_NO.nc", |
||
540 | help="the filename of the output netcdf file") |
||
541 | parser.add_argument("-M", "--month", metavar="YEAR-MM", |
||
542 | help="infer start and end dates for month") |
||
543 | parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE", |
||
544 | help="colon-separated start and end dates") |
||
545 | parser.add_argument("-d", "--dates", help="comma-separated list of dates") |
||
546 | parser.add_argument("-f", "--orbit_file", |
||
547 | help="the file containing the input orbits") |
||
548 | parser.add_argument("-p", "--path", default=None, |
||
549 | help="path containing the L2 data") |
||
550 | parser.add_argument("-r", "--retrieval_version", default="v6.2", |
||
551 | help="SCIAMACHY level 2 data version to process") |
||
552 | parser.add_argument("-R", "--file_version", default="2.3", |
||
553 | help="Postprocessing format version of the output file") |
||
554 | parser.add_argument("-s", "--spectra", default=None, |
||
555 | help="path containing the L1c spectra") |
||
556 | parser.add_argument("-m", "--mlt", action="store_true", default=False, |
||
557 | help="indicate nominal (False, default) or MLT data (True)") |
||
558 | parser.add_argument("-X", "--xarray", action="store_true", default=False, |
||
559 | help="use xarray to prepare the dataset" |
||
560 | " (experimental, default %(default)s)") |
||
561 | loglevels = parser.add_mutually_exclusive_group() |
||
562 | loglevels.add_argument("-l", "--loglevel", default="WARNING", |
||
563 | choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], |
||
564 | help="change the loglevel (default: 'WARNING')") |
||
565 | loglevels.add_argument("-q", "--quiet", action="store_true", default=False, |
||
566 | help="less output, same as --loglevel=ERROR (default: False)") |
||
567 | loglevels.add_argument("-v", "--verbose", action="store_true", default=False, |
||
568 | help="verbose output, same as --loglevel=INFO (default: False)") |
||
569 | args = parser.parse_args() |
||
570 | if args.quiet: |
||
571 | logging.getLogger().setLevel(logging.ERROR) |
||
572 | elif args.verbose: |
||
573 | logging.getLogger().setLevel(logging.INFO) |
||
574 | else: |
||
575 | logging.getLogger().setLevel(args.loglevel) |
||
576 | |||
577 | logging.info("processing L2 version: %s", args.retrieval_version) |
||
578 | logging.info("writing data file version: %s", args.file_version) |
||
579 | |||
580 | pddrange = [] |
||
581 | if args.month is not None: |
||
582 | d0 = pd.to_datetime(args.month + "-01") |
||
583 | pddrange += pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd()) |
||
584 | if args.date_range is not None: |
||
585 | pddrange += pd.date_range(*args.date_range.split(':')) |
||
586 | if args.dates is not None: |
||
587 | pddrange += pd.to_datetime(args.dates.split(',')) |
||
588 | logging.debug("pddrange: %s", pddrange) |
||
589 | |||
590 | olist = [] |
||
591 | for date in pddrange: |
||
592 | try: |
||
593 | olist += get_orbits_from_date(date.strftime("%Y-%m-%d"), |
||
594 | mlt=args.mlt, path=args.path, L2_version=args.retrieval_version) |
||
595 | except: # handle NaT |
||
596 | pass |
||
597 | if args.orbit_file is not None: |
||
598 | olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist() |
||
599 | logging.debug("olist: %s", olist) |
||
600 | |||
601 | if not olist: |
||
602 | logging.warn("No orbits to process.") |
||
603 | return |
||
604 | |||
605 | sdlist, sdxr_ds = combine_orbit_data(olist, |
||
606 | ref_date="2000-01-01", |
||
607 | L2_version=args.retrieval_version, file_version=args.file_version, |
||
608 | dens_path=args.path, spec_base=args.spectra, use_xarray=args.xarray, |
||
609 | save_nc=False) |
||
610 | |||
611 | if args.xarray and sdxr_ds is not None: |
||
612 | sd_xr = sddata_xr_set_attrs(sdxr_ds, ref_date="2000-01-01") |
||
613 | sd_xr2 = sdlist.to_xarray() |
||
614 | logging.debug(sd_xr) |
||
615 | logging.debug(sd_xr2) |
||
616 | logging.debug("equal datasets: %s", sd_xr.equals(sd_xr2)) |
||
617 | xr.testing.assert_allclose(sd_xr, sd_xr2) |
||
618 | if sd_xr2 is not None: |
||
619 | logging.debug("xarray dataset: %s", sd_xr2) |
||
620 | sd_xr2.to_netcdf(args.file, unlimited_dims=["time"]) |
||
621 | else: |
||
622 | if sdlist.no_dens is not None: |
||
623 | sdlist.write_to_netcdf(args.file) |
||
624 | else: |
||
625 | logging.warn("Processed data is empty.") |
||
626 | |||
627 | |||
628 | if __name__ == "__main__": |
||
629 | main() |
||
630 |