Total Complexity | 57 |
Total Lines | 848 |
Duplicated Lines | 18.75 % |
Coverage | 86.78% |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like sciapy.level2.post_process often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | #!/usr/bin/env python |
||
2 | # vim:fileencoding=utf-8 |
||
3 | # |
||
4 | # Copyright (c) 2018 Stefan Bender |
||
5 | # |
||
6 | # This file is part of sciapy. |
||
7 | # sciapy is free software: you can redistribute it or modify it |
||
8 | # under the terms of the GNU General Public License as published by |
||
9 | # the Free Software Foundation, version 2. |
||
10 | # See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html. |
||
11 | 1 | """SCIAMACHY level 2 data post processing |
|
12 | |||
13 | Main script for SCIAMACHY orbital retrieval post processing |
||
14 | and data combining (to netcdf). |
||
15 | """ |
||
16 | |||
17 | 1 | from __future__ import absolute_import, division, print_function |
|
18 | |||
19 | 1 | import glob |
|
20 | 1 | import os |
|
21 | 1 | import argparse as ap |
|
22 | 1 | import datetime as dt |
|
23 | 1 | import logging |
|
24 | 1 | from pkg_resources import resource_filename |
|
25 | |||
26 | 1 | import numpy as np |
|
27 | 1 | import pandas as pd |
|
28 | 1 | import xarray as xr |
|
29 | 1 | from scipy.interpolate import interp1d |
|
30 | #import aacgmv2 |
||
31 | #import apexpy |
||
32 | |||
33 | 1 | from astropy import units |
|
34 | 1 | from astropy.time import Time |
|
35 | 1 | from astropy.utils import iers |
|
36 | 1 | import astropy.coordinates as coord |
|
37 | |||
38 | 1 | import sciapy.level1c as sl |
|
39 | 1 | from .. import __version__ |
|
40 | 1 | from . import scia_akm as sa |
|
41 | 1 | from .igrf import gmag_igrf |
|
42 | 1 | from .aacgm2005 import gmag_aacgm2005 |
|
43 | 1 | try: |
|
44 | 1 | from nrlmsise00 import msise_flat as msise |
|
45 | except ImportError: |
||
46 | msise = None |
||
47 | 1 | try: |
|
48 | 1 | from .noem import noem_cpp |
|
49 | 1 | except ImportError: |
|
50 | 1 | noem_cpp = None |
|
51 | |||
52 | 1 | F107_FILE = resource_filename("sciapy", "data/indices/f107_noontime_flux_obs.txt") |
|
53 | 1 | F107A_FILE = resource_filename("sciapy", "data/indices/f107a_noontime_flux_obs.txt") |
|
54 | 1 | AP_FILE = resource_filename("sciapy", "data/indices/spidr_ap_2000-2012.dat") |
|
55 | 1 | F107_ADJ_FILE = resource_filename("sciapy", "data/indices/spidr_f107_2000-2012.dat") |
|
56 | 1 | KP_FILE = resource_filename("sciapy", "data/indices/spidr_kp_2000-2012.dat") |
|
57 | |||
58 | 1 | PHI_FAC = 11.91 |
|
59 | 1 | LST_FAC = -0.62 |
|
60 | |||
61 | 1 | iers.conf.auto_download = False |
|
62 | 1 | iers.conf.auto_max_age = None |
|
63 | # Use a mirror page for the astrpy time data, see |
||
64 | # https://github.com/mzechmeister/serval/issues/33#issuecomment-551156361 |
||
65 | # and |
||
66 | # https://github.com/astropy/astropy/issues/8981#issuecomment-523984247 |
||
67 | 1 | iers.conf.iers_auto_url = "https://astroconda.org/aux/astropy_mirror/iers_a_1/finals2000A.all" |
|
68 | |||
69 | |||
70 | 1 | View Code Duplication | def solar_zenith_angle(alt, lat, lon, time): |
|
|||
71 | 1 | atime = Time(time) |
|
72 | 1 | loc = coord.EarthLocation.from_geodetic( |
|
73 | height=alt * units.km, |
||
74 | lat=lat * units.deg, |
||
75 | lon=lon * units.deg, |
||
76 | ) |
||
77 | 1 | altaz = coord.AltAz(location=loc, obstime=atime) |
|
78 | 1 | sun = coord.get_sun(atime) |
|
79 | 1 | return sun.transform_to(altaz).zen.value |
|
80 | |||
81 | |||
82 | 1 | View Code Duplication | def read_spectra(year, orbit, spec_base=None, skip_upleg=True): |
83 | """Read and examine SCIAMACHY orbit spectra |
||
84 | |||
85 | Reads the limb spactra and extracts the dates, times, latitudes, |
||
86 | longitudes to be used to re-assess the retrieved geolocations. |
||
87 | |||
88 | Parameters |
||
89 | ---------- |
||
90 | year: int |
||
91 | The measurement year to select the corresponding subdir |
||
92 | below `spec_base` (see below). |
||
93 | orbit: int |
||
94 | SCIAMACHY/Envisat orbit number of the spectra. |
||
95 | spec_base: str, optional |
||
96 | The root path to the level 1c spectra. Uses the current |
||
97 | dir if not set or set to `None` (default). |
||
98 | skip_upleg: bool, optional |
||
99 | Skip upleg limb scans, i.e. night time scans. For NO retrievals, |
||
100 | those are not used and should be not used here. |
||
101 | Default: True |
||
102 | |||
103 | Returns |
||
104 | ------- |
||
105 | (dts, times, lats, lons, mlsts, alsts, eotcorr) |
||
106 | """ |
||
107 | 1 | fail = (None,) * 7 |
|
108 | 1 | if spec_base is None: |
|
109 | spec_base = os.curdir |
||
110 | 1 | spec_path = os.path.join(spec_base, "{0}".format(year)) |
|
111 | 1 | spec_path2 = os.path.join(spec_base, "{0}".format(int(year) + 1)) |
|
112 | 1 | logging.debug("spec_path: %s", spec_path) |
|
113 | 1 | logging.debug("spec_path2: %s", spec_path) |
|
114 | 1 | if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)): |
|
115 | return fail |
||
116 | |||
117 | # the star stands for the (optional) date subdir |
||
118 | # to find all spectra for the orbit |
||
119 | 1 | spfiles = glob.glob( |
|
120 | '{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary' |
||
121 | .format(spec_path, orbit)) |
||
122 | # sometimes for whatever reason the orbit ends up in the wrong year subdir |
||
123 | # looks in the subdir for the following year as well. |
||
124 | 1 | spfiles += glob.glob( |
|
125 | '{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary' |
||
126 | .format(spec_path2, orbit)) |
||
127 | 1 | if len(spfiles) < 2: |
|
128 | return fail |
||
129 | |||
130 | 1 | dts = [] |
|
131 | 1 | times = [] |
|
132 | 1 | lats = [] |
|
133 | 1 | lons = [] |
|
134 | 1 | mlsts = [] |
|
135 | 1 | alsts = [] |
|
136 | |||
137 | 1 | sls = sl.scia_limb_scan() |
|
138 | 1 | for f in sorted(spfiles): |
|
139 | 1 | sls.read_from_file(f) |
|
140 | # copy the values from the l1c file |
||
141 | 1 | lat, lon = sls.cent_lat_lon[:2] |
|
142 | 1 | mlst, alst, eotcorr = sls.local_solar_time(False) |
|
143 | 1 | tp_lats = sls.limb_data.tp_lat |
|
144 | 1 | date = sls.date |
|
145 | # debug output if requested |
||
146 | 1 | logging.debug("file: %s", f) |
|
147 | 1 | logging.debug("lat: %s, lon: %s", lat, lon) |
|
148 | 1 | logging.debug("mlst: %s, alst: %s, eotcorr: %s", mlst, alst, eotcorr) |
|
149 | 1 | logging.debug("tp_lats: %s", tp_lats) |
|
150 | 1 | logging.debug("date: %s", date) |
|
151 | 1 | if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5): |
|
152 | # Exclude non-downleg measurements where the latitude |
||
153 | # of the last real tangent point (the last is dark sky) |
||
154 | # is larger than or too close to the first latitude. |
||
155 | # Requires an (empirical) separation of +0.5 degree. |
||
156 | 1 | logging.debug("excluding upleg point at: %s, %s", lat, lon) |
|
157 | 1 | continue |
|
158 | 1 | dtdate = pd.to_datetime(dt.datetime(*date), utc=True) |
|
159 | 1 | time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0 |
|
160 | 1 | logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr) |
|
161 | 1 | dts.append(dtdate) |
|
162 | 1 | times.append(time_hour) |
|
163 | 1 | lats.append(lat) |
|
164 | 1 | lons.append(lon) |
|
165 | 1 | mlsts.append(mlst) |
|
166 | 1 | alsts.append(alst) |
|
167 | |||
168 | 1 | if len(lats) < 2: |
|
169 | # interpolation will fail with less than 2 points |
||
170 | return fail |
||
171 | |||
172 | 1 | return (np.asarray(dts), |
|
173 | np.asarray(times), |
||
174 | np.asarray(lats), |
||
175 | np.asarray(lons), |
||
176 | np.asarray(mlsts), |
||
177 | np.asarray(alsts), eotcorr) |
||
178 | |||
179 | |||
180 | 1 | def _get_orbit_ds(filename): |
|
181 | # >= 1.5 (NO-v1.5) |
||
182 | 1 | columns = [ |
|
183 | "id", "alt_max", "alt", "alt_min", |
||
184 | "lat_max", "lat", "lat_min", "lons", "densities", |
||
185 | "dens_err_meas", "dens_err_tot", "dens_tot", "apriori", "akdiag", |
||
186 | ] |
||
187 | # peek at the first line to extract the number of columns |
||
188 | 1 | with open(filename, 'rb') as _f: |
|
189 | 1 | ncols = len(_f.readline().split()) |
|
190 | # reduce the columns depending on the retrieval version |
||
191 | # default is >= 1.5 (NO-v1.5) |
||
192 | 1 | if ncols < 16: # < 1.5 (NO_emiss-183-gcaa9349) |
|
193 | columns.remove("akdiag") |
||
194 | if ncols < 15: # < 1.0 (NO_emiss-178-g729efb0) |
||
195 | columns.remove("apriori") |
||
196 | if ncols < 14: # initial output << v1.0 |
||
197 | columns.remove("lons") |
||
198 | 1 | sdd_pd = pd.read_table(filename, header=None, names=columns, skiprows=1, sep='\s+') |
|
199 | 1 | sdd_pd = sdd_pd.set_index("id") |
|
200 | 1 | logging.debug("orbit ds: %s", sdd_pd.to_xarray()) |
|
201 | 1 | ind = pd.MultiIndex.from_arrays( |
|
202 | [sdd_pd.lat, sdd_pd.alt], |
||
203 | names=["lats", "alts"], |
||
204 | ) |
||
205 | 1 | sdd_ds = xr.Dataset.from_dataframe(sdd_pd).assign(id=ind).unstack("id") |
|
206 | 1 | logging.debug("orbit dataset: %s", sdd_ds) |
|
207 | 1 | sdd_ds["lons"] = sdd_ds.lons.mean("alts") |
|
208 | 1 | sdd_ds.load() |
|
209 | 1 | logging.debug("orbit ds 2: %s", sdd_ds.stack(id=["lats", "alts"]).reset_index("id")) |
|
210 | 1 | return sdd_ds |
|
211 | |||
212 | |||
213 | 1 | class _circ_interp(object): |
|
214 | """Interpolation on a circle""" |
||
215 | 1 | def __init__(self, x, y, **kw): |
|
216 | 1 | self.c_intpf = interp1d(x, np.cos(y), **kw) |
|
217 | 1 | self.s_intpf = interp1d(x, np.sin(y), **kw) |
|
218 | |||
219 | 1 | def __call__(self, x): |
|
220 | 1 | return np.arctan2(self.s_intpf(x), self.c_intpf(x)) |
|
221 | |||
222 | |||
223 | 1 | def process_orbit( |
|
224 | orbit, |
||
225 | ref_date="2000-01-01", |
||
226 | dens_path=None, |
||
227 | spec_base=None, |
||
228 | use_msis=True, |
||
229 | ): |
||
230 | """Post process retrieved SCIAMACHY orbit |
||
231 | |||
232 | Parameters |
||
233 | ---------- |
||
234 | orbit: int |
||
235 | SCIAMACHY/Envisat orbit number of the results to process. |
||
236 | ref_date: str, optional |
||
237 | Base date to calculate the relative days from, |
||
238 | of the format "%Y-%m-%d". Default: 2000-01-01 |
||
239 | dens_path: str, optional |
||
240 | The path to the level 2 data. Uses the current |
||
241 | dir if not set or set to `None` (default). |
||
242 | spec_base: str, optional |
||
243 | The root path to the level 1c spectra. Uses the current |
||
244 | dir if not set or set to `None` (default). |
||
245 | |||
246 | Returns |
||
247 | ------- |
||
248 | (dts0, time0, lst0, lon0, sdd): tuple |
||
249 | dts0 - days since ref_date at equator crossing (float) |
||
250 | time0 - utc hour into the day at equator crossing (float) |
||
251 | lst0 - apparent local solar time at the equator (float) |
||
252 | lon0 - longitude of the equator crossing (float) |
||
253 | sdd - `scia_density_pp` instance of the post-processed data |
||
254 | """ |
||
255 | 1 | def _read_gm(fname): |
|
256 | 1 | return dict(np.genfromtxt(fname, usecols=[0, 2], dtype=None)) |
|
257 | |||
258 | 1 | fail = (None,) * 5 |
|
259 | 1 | logging.debug("processing orbit: %s", orbit) |
|
260 | 1 | dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True) |
|
261 | 1 | logging.debug("ref date: %s", dtrefdate) |
|
262 | |||
263 | 1 | dfiles = glob.glob( |
|
264 | "{0}/000NO_orbit_{1:05d}_*_Dichten.txt" |
||
265 | .format(dens_path, orbit)) |
||
266 | 1 | if len(dfiles) < 1: |
|
267 | 1 | return fail |
|
268 | 1 | logging.debug("dfiles: %s", dfiles) |
|
269 | 1 | logging.debug("splits: %s", [fn.split('/') for fn in dfiles]) |
|
270 | 1 | ddict = dict([ |
|
271 | (fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4])) |
||
272 | for fn in dfiles |
||
273 | ]) |
||
274 | 1 | logging.debug("ddict: %s", ddict) |
|
275 | 1 | year = ddict[sorted(ddict.keys())[0]][-1][:4] |
|
276 | 1 | logging.debug("year: %s", year) |
|
277 | |||
278 | 1 | dts, times, lats, lons, mlsts, alsts, eotcorr = \ |
|
279 | read_spectra(year, orbit, spec_base) |
||
280 | 1 | if dts is None: |
|
281 | # return early if reading the spectra failed |
||
282 | return fail |
||
283 | |||
284 | 1 | dts = pd.to_datetime(dts, utc=True) - dtrefdate |
|
285 | 1 | dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts]) |
|
286 | 1 | logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times) |
|
287 | |||
288 | 1 | sdd = _get_orbit_ds(dfiles[0]) |
|
289 | 1 | logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons) |
|
290 | |||
291 | # Re-interpolates the location (longitude) and times from the |
||
292 | # limb scan spectra files along the orbit to determine the values |
||
293 | # at the Equator and to fill in possibly missing data. |
||
294 | # |
||
295 | # y values are unit circle angles in radians (0 < phi < 2 pi or -pi < phi < pi) |
||
296 | # longitudes |
||
297 | 1 | lons_intpf = _circ_interp( |
|
298 | lats[::-1], np.radians(lons[::-1]), |
||
299 | fill_value="extrapolate", |
||
300 | ) |
||
301 | # apparent local solar time (EoT corrected) |
||
302 | 1 | lst_intpf = _circ_interp( |
|
303 | lats[::-1], np.pi / 12. * alsts[::-1], |
||
304 | fill_value="extrapolate", |
||
305 | ) |
||
306 | # mean local solar time |
||
307 | 1 | mst_intpf = _circ_interp( |
|
308 | lats[::-1], np.pi / 12. * mlsts[::-1], |
||
309 | fill_value="extrapolate", |
||
310 | ) |
||
311 | # utc time (day) |
||
312 | 1 | time_intpf = _circ_interp( |
|
313 | lats[::-1], np.pi / 12. * times[::-1], |
||
314 | fill_value="extrapolate", |
||
315 | ) |
||
316 | # datetime |
||
317 | 1 | dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate") |
|
318 | |||
319 | # equator values |
||
320 | 1 | lon0 = np.degrees(lons_intpf(0.)) % 360. |
|
321 | 1 | lst0 = (lst_intpf(0.) * 12. / np.pi) % 24. |
|
322 | 1 | mst0 = (mst_intpf(0.) * 12. / np.pi) % 24. |
|
323 | 1 | time0 = (time_intpf(0.) * 12. / np.pi) % 24. |
|
324 | 1 | dts_retr_interp0 = dts_retr_interpf(0.) |
|
325 | 1 | logging.debug("utc day at equator: %s", dts_retr_interp0) |
|
326 | 1 | logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0) |
|
327 | |||
328 | 1 | sdd["utc_hour"] = ("lats", (time_intpf(sdd.lats) * 12. / np.pi) % 24.) |
|
329 | 1 | sdd["utc_days"] = ("lats", dts_retr_interpf(sdd.lats)) |
|
330 | |||
331 | 1 | View Code Duplication | if "lons" not in sdd.data_vars: |
332 | # recalculate the longitudes |
||
333 | # estimate the equatorial longitude from the |
||
334 | # limb scan latitudes and longitudes |
||
335 | lon0s_tp = lons - PHI_FAC * np.tan(np.radians(lats)) |
||
336 | clon0s_tp = np.cos(np.radians(lon0s_tp)) |
||
337 | slon0s_tp = np.sin(np.radians(lon0s_tp)) |
||
338 | lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1])) |
||
339 | lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi)) |
||
340 | logging.info("lon0: %s", lon0) |
||
341 | logging.info("lon0 tp: %s", lon0_tp) |
||
342 | # interpolate to the retrieval latitudes |
||
343 | tg_retr_lats = np.tan(np.radians(sdd.lats)) |
||
344 | calc_lons = (tg_retr_lats * PHI_FAC + lon0) % 360. |
||
345 | calc_lons_tp = (tg_retr_lats * PHI_FAC + lon0_tp) % 360. |
||
346 | sdd["lons"] = calc_lons_tp |
||
347 | logging.debug("(calculated) retrieval lons: %s, %s", |
||
348 | calc_lons, calc_lons_tp) |
||
349 | else: |
||
350 | # sdd.lons = sdd.lons % 360. |
||
351 | 1 | logging.debug("(original) retrieval lons: %s", sdd.lons) |
|
352 | |||
353 | 1 | sdd["mst"] = (sdd.utc_hour + sdd.lons / 15.) % 24. |
|
354 | 1 | sdd["lst"] = sdd.mst + eotcorr / 60. |
|
355 | 1 | mean_alt_km = sdd.alts.mean() |
|
356 | |||
357 | 1 | dt_date_this = dt.timedelta(dts_retr_interp0.item()) + dtrefdate |
|
358 | 1 | logging.info("date: %s", dt_date_this) |
|
359 | |||
360 | 1 | gmlats, gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=mean_alt_km) |
|
361 | # gmlats, gmlons = apexpy.Apex(dt_date_this).geo2qd(sdd.lats, sdd.lons, mean_alt_km) |
||
362 | 1 | sdd["gm_lats"] = gmlats |
|
363 | 1 | sdd["gm_lons"] = gmlons |
|
364 | 1 | logging.debug("geomag. lats: %s, lons: %s", sdd.gm_lats, sdd.gm_lons) |
|
365 | 1 | aacgmgmlats, aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons) |
|
366 | # aacgmgmlats, aacgmgmlons = aacgmv2.convert(sdd.lats, sdd.lons, mean_alt_km, dt_date_this) |
||
367 | 1 | sdd["aacgm_gm_lats"] = ("lats", aacgmgmlats) |
|
368 | 1 | sdd["aacgm_gm_lons"] = ("lats", aacgmgmlons) |
|
369 | 1 | logging.debug("aacgm geomag. lats: %s, lons: %s", |
|
370 | sdd.aacgm_gm_lats, sdd.aacgm_gm_lons) |
||
371 | |||
372 | # current day for MSIS input |
||
373 | 1 | f107_data = _read_gm(F107_FILE) |
|
374 | 1 | f107a_data = _read_gm(F107A_FILE) |
|
375 | 1 | ap_data = _read_gm(AP_FILE) |
|
376 | 1 | msis_dtdate = dt.timedelta(dts_retr_interp0.item()) + dtrefdate |
|
377 | 1 | msis_dtdate1 = msis_dtdate - dt.timedelta(days=1) |
|
378 | 1 | msis_date = msis_dtdate.strftime("%Y-%m-%d").encode() |
|
379 | 1 | msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode() |
|
380 | 1 | msis_f107 = f107_data[msis_date1] |
|
381 | 1 | msis_f107a = f107a_data[msis_date] |
|
382 | 1 | msis_ap = ap_data[msis_date] |
|
383 | 1 | logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s", |
|
384 | msis_date, msis_f107a, msis_f107, msis_ap) |
||
385 | |||
386 | # previous day for NOEM input |
||
387 | 1 | f107_adj = _read_gm(F107_ADJ_FILE) |
|
388 | 1 | kp_data = _read_gm(KP_FILE) |
|
389 | 1 | noem_dtdate = dt.timedelta(dts_retr_interp0.item() - 1) + dtrefdate |
|
390 | 1 | noem_date = noem_dtdate.strftime("%Y-%m-%d").encode() |
|
391 | 1 | noem_f107 = f107_adj[noem_date] |
|
392 | 1 | noem_kp = kp_data[noem_date] |
|
393 | 1 | logging.debug("NOEM date: %s, f10.7: %s, kp: %s", |
|
394 | noem_date, noem_f107, noem_kp) |
||
395 | |||
396 | 1 | for var in ["noem_no"]: |
|
397 | 1 | if var not in sdd.data_vars: |
|
398 | 1 | sdd[var] = xr.zeros_like(sdd.densities) |
|
399 | 1 | if "sza" not in sdd.data_vars: |
|
400 | 1 | sdd["sza"] = xr.zeros_like(sdd.lats) |
|
401 | 1 | if "akdiag" not in sdd.data_vars: |
|
402 | sdd["akdiag"] = xr.full_like(sdd.densities, np.nan) |
||
403 | #akm_filename = glob.glob('{0}_orbit_{1:05d}_*_AKM*'.format(species, orb))[0] |
||
404 | akm_filename = glob.glob( |
||
405 | "{0}/000NO_orbit_{1:05d}_*_AKM*" |
||
406 | .format(dens_path, orbit))[0] |
||
407 | logging.debug("ak file: %s", akm_filename) |
||
408 | ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat) |
||
409 | logging.debug("ak data: %s", ak) |
||
410 | #ak1a = ak.sum(axis = 3) |
||
411 | #dak1a = np.diagonal(ak1a, axis1=0, axis2=2) |
||
412 | sdd["akdiag"] = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1) |
||
413 | |||
414 | 1 | if msise is not None: |
|
415 | 1 | _msis_d_t = msise( |
|
416 | msis_dtdate, |
||
417 | sdd.alts.values[None, :], |
||
418 | sdd.lats.values[:, None], |
||
419 | sdd.lons.values[:, None] % 360., |
||
420 | msis_f107a, msis_f107, msis_ap, |
||
421 | lst=sdd.lst.values[:, None], |
||
422 | ) |
||
423 | 1 | if "temperature" not in sdd.data_vars or use_msis: |
|
424 | 1 | sdd["temperature"] = xr.zeros_like(sdd.densities) |
|
425 | 1 | sdd.temperature[:] = _msis_d_t[:, :, -1] |
|
426 | 1 | if "dens_tot" not in sdd.data_vars or use_msis: |
|
427 | 1 | sdd["dens_tot"] = xr.zeros_like(sdd.densities) |
|
428 | 1 | sdd.dens_tot[:] = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2) |
|
429 | 1 | for i, (lat, lon) in enumerate( |
|
430 | zip(sdd.lats.values, sdd.lons.values)): |
||
431 | 1 | if noem_cpp is not None: |
|
432 | sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts, |
||
433 | [lat], [lon], noem_f107, noem_kp)[:] |
||
434 | else: |
||
435 | 1 | sdd.noem_no[i][:] = np.nan |
|
436 | 1 | sdd.sza[:] = solar_zenith_angle( |
|
437 | mean_alt_km, |
||
438 | sdd.lats, sdd.lons, |
||
439 | (pd.to_timedelta(sdd.utc_days.values, unit="days") + dtrefdate).to_pydatetime(), |
||
440 | ) |
||
441 | 1 | sdd["vmr"] = sdd.densities / sdd.dens_tot * 1.e9 # ppb |
|
442 | # drop unused variables |
||
443 | 1 | sdd = sdd.drop(["alt_min", "alt", "alt_max", "lat_min", "lat", "lat_max"]) |
|
444 | # time and orbit |
||
445 | 1 | sdd = sdd.expand_dims("time") |
|
446 | 1 | sdd["time"] = ("time", [dts_retr_interp0]) |
|
447 | 1 | sdd["orbit"] = ("time", [orbit]) |
|
448 | 1 | return dts_retr_interp0, time0, lst0, lon0, sdd |
|
449 | |||
450 | |||
451 | 1 | View Code Duplication | def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"): |
452 | """Find SCIAMACHY orbits with retrieved data at a date |
||
453 | |||
454 | Parameters |
||
455 | ---------- |
||
456 | date: str |
||
457 | The date in the format "%Y-%m-%d". |
||
458 | mlt: bool, optional |
||
459 | Look for MLT mode data instead of nominal mode data. |
||
460 | Increases the heuristics to find all MLT orbits. |
||
461 | path: str, optional |
||
462 | The path to the level 2 data. If `None` tries to infer |
||
463 | the data directory from the L2 version using |
||
464 | `./*<L2_version>`. Default: None |
||
465 | |||
466 | Returns |
||
467 | ------- |
||
468 | orbits: list |
||
469 | List of found orbits with retrieved data files |
||
470 | """ |
||
471 | 1 | logging.debug("pre-processing: %s", date) |
|
472 | 1 | if path is None: |
|
473 | density_base = os.curdir |
||
474 | path = "{0}/*{1}".format(density_base, L2_version) |
||
475 | logging.debug("path: %s", path) |
||
476 | |||
477 | 1 | dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format( |
|
478 | path, date.replace("-", ""))) |
||
479 | 1 | orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles]) |
|
480 | 1 | if mlt: |
|
481 | 1 | orbits.append(orbits[-1] + 1) |
|
482 | 1 | return orbits |
|
483 | |||
484 | |||
485 | 1 | def combine_orbit_data(orbits, |
|
486 | ref_date="2000-01-01", |
||
487 | L2_version="v6.2", |
||
488 | dens_path=None, spec_base=None, |
||
489 | save_nc=False): |
||
490 | """Combine post-processed SCIAMACHY retrieved orbit data |
||
491 | |||
492 | Parameters |
||
493 | ---------- |
||
494 | orbits: list |
||
495 | List of SCIAMACHY/Envisat orbit numbers to process. |
||
496 | ref_date: str, optional |
||
497 | Base date to calculate the relative days from, |
||
498 | of the format "%Y-%m-%d". Default: 2000-01-01 |
||
499 | L2_version: str, optional |
||
500 | SCIAMACHY level 2 data version to process |
||
501 | dens_path: str, optional |
||
502 | The path to the level 2 data. If `None` tries to infer |
||
503 | the data directory from the L2 version looking for anything |
||
504 | in the current directory that ends in <L2_version>: `./*<L2_version>`. |
||
505 | Default: None |
||
506 | spec_base: str, optional |
||
507 | The root path to the level 1c spectra. Uses the current |
||
508 | dir if not set or set to `None` (default). |
||
509 | save_nc: bool, optional |
||
510 | Save the intermediate orbit data sets to netcdf files |
||
511 | for debugging. |
||
512 | |||
513 | Returns |
||
514 | ------- |
||
515 | (sdday, sdday_ds): tuple |
||
516 | `sdday` contains the combined data as a `scia_density_day` instance, |
||
517 | `sdday_ds` contains the same data as a `xarray.Dataset`. |
||
518 | """ |
||
519 | 1 | if dens_path is None: |
|
520 | # try some heuristics |
||
521 | density_base = os.curdir |
||
522 | dens_path = "{0}/*{1}".format(density_base, L2_version) |
||
523 | |||
524 | 1 | sddayl = [] |
|
525 | 1 | for orbit in sorted(orbits): |
|
526 | 1 | dateo, timeo, lsto, lono, sdens = process_orbit(orbit, |
|
527 | ref_date=ref_date, dens_path=dens_path, spec_base=spec_base) |
||
528 | 1 | logging.info( |
|
529 | "orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s", |
||
530 | orbit, dateo, timeo, lsto, lono |
||
531 | ) |
||
532 | 1 | if sdens is not None: |
|
533 | 1 | sddayl.append(sdens) |
|
534 | 1 | if save_nc: |
|
535 | sdens.to_netcdf(sdens.filename[:-3] + "nc") |
||
536 | 1 | if not sddayl: |
|
537 | return None |
||
538 | 1 | return xr.concat(sddayl, dim="time") |
|
539 | |||
540 | |||
541 | 1 | VAR_ATTRS = { |
|
542 | "2.1": { |
||
543 | "MSIS_Dens": dict( |
||
544 | units='cm^{-3}', |
||
545 | long_name='total number density (NRLMSIS-00)', |
||
546 | ), |
||
547 | "MSIS_Temp": dict( |
||
548 | units='K', |
||
549 | long_name='temperature', |
||
550 | model="NRLMSIS-00", |
||
551 | ), |
||
552 | }, |
||
553 | "2.2": { |
||
554 | }, |
||
555 | "2.3": { |
||
556 | "aacgm_gm_lats": dict( |
||
557 | long_name='geomagnetic_latitude', |
||
558 | model='AACGM2005 at 80 km', |
||
559 | units='degrees_north', |
||
560 | ), |
||
561 | "aacgm_gm_lons": dict( |
||
562 | long_name='geomagnetic_longitude', |
||
563 | model='AACGM2005 at 80 km', |
||
564 | units='degrees_east', |
||
565 | ), |
||
566 | "orbit": dict( |
||
567 | axis='T', calendar='standard', |
||
568 | long_name='SCIAMACHY/Envisat orbit number', |
||
569 | standard_name="orbit", |
||
570 | units='1', |
||
571 | ), |
||
572 | }, |
||
573 | } |
||
574 | 1 | VAR_RENAME = { |
|
575 | "2.1": { |
||
576 | # Rename to v2.1 variable names |
||
577 | "MSIS_Dens": "TOT_DENS", |
||
578 | "MSIS_Temp": "temperature", |
||
579 | }, |
||
580 | "2.2": { |
||
581 | }, |
||
582 | "2.3": { |
||
583 | }, |
||
584 | } |
||
585 | 1 | FLOAT_VARS = [ |
|
586 | "altitude", "latitude", "longitude", |
||
587 | "app_LST", "mean_LST", "mean_SZA", |
||
588 | "aacgm_gm_lats", "aacgm_gm_lons", |
||
589 | "gm_lats", "gm_lons", |
||
590 | ] |
||
591 | |||
592 | |||
593 | 1 | def sddata_set_attrs( |
|
594 | sdday_ds, |
||
595 | file_version="2.2", |
||
596 | ref_date="2000-01-01", |
||
597 | rename=True, |
||
598 | species="NO", |
||
599 | ): |
||
600 | """Customize xarray Dataset variables and attributes |
||
601 | |||
602 | Changes the variable names to match those exported from the |
||
603 | `scia_density_day` class. |
||
604 | |||
605 | Parameters |
||
606 | ---------- |
||
607 | sdday_ds: `xarray.Dataset` instance |
||
608 | The combined dataset. |
||
609 | file_version: string "major.minor", optional |
||
610 | The netcdf file datase version, determines some variable |
||
611 | names and attributes. |
||
612 | ref_date: str, optional |
||
613 | Base date to calculate the relative days from, |
||
614 | of the format "%Y-%m-%d". Default: 2000-01-01 |
||
615 | rename: bool, optional |
||
616 | Rename the dataset variables to match the |
||
617 | `scia_density_day` exported ones. |
||
618 | Default: True |
||
619 | species: str, optional |
||
620 | The name of the level 2 species, used to prefix the |
||
621 | dataset variables to be named <species>_<variable>. |
||
622 | Default: "NO". |
||
623 | """ |
||
624 | 1 | if rename: |
|
625 | 1 | sdday_ds = sdday_ds.rename({ |
|
626 | # 2d vars |
||
627 | "akdiag": "{0}_AKDIAG".format(species), |
||
628 | "apriori": "{0}_APRIORI".format(species), |
||
629 | "densities": "{0}_DENS".format(species), |
||
630 | "dens_err_meas": "{0}_ERR".format(species), |
||
631 | "dens_err_tot": "{0}_ETOT".format(species), |
||
632 | "dens_tot": "MSIS_Dens", |
||
633 | "noem_no": "{0}_NOEM".format(species), |
||
634 | "temperature": "MSIS_Temp", |
||
635 | "vmr": "{0}_VMR".format(species), |
||
636 | # 1d vars and dimensions |
||
637 | "alts": "altitude", |
||
638 | "lats": "latitude", |
||
639 | "lons": "longitude", |
||
640 | "lst": "app_LST", |
||
641 | "mst": "mean_LST", |
||
642 | "sza": "mean_SZA", |
||
643 | "utc_hour": "UTC", |
||
644 | }) |
||
645 | # relative standard deviation |
||
646 | 1 | sdday_ds["{0}_RSTD".format(species)] = 100.0 * np.abs( |
|
647 | sdday_ds["{0}_ERR".format(species)] / sdday_ds["{0}_DENS".format(species)]) |
||
648 | # fix coordinate attributes |
||
649 | 1 | sdday_ds["time"].attrs = dict(axis='T', standard_name='time', |
|
650 | calendar='standard', long_name='equatorial crossing time', |
||
651 | units="days since {0}".format( |
||
652 | pd.to_datetime(ref_date, utc=True).isoformat(sep=" "))) |
||
653 | 1 | sdday_ds["altitude"].attrs = dict(axis='Z', positive='up', |
|
654 | long_name='altitude', standard_name='altitude', units='km') |
||
655 | 1 | sdday_ds["latitude"].attrs = dict(axis='Y', long_name='latitude', |
|
656 | standard_name='latitude', units='degrees_north') |
||
657 | # Default variable attributes |
||
658 | 1 | sdday_ds["{0}_DENS".format(species)].attrs = { |
|
659 | "units": "cm^{-3}", |
||
660 | "long_name": "{0} number density".format(species)} |
||
661 | 1 | sdday_ds["{0}_ERR".format(species)].attrs = { |
|
662 | "units": "cm^{-3}", |
||
663 | "long_name": "{0} density measurement error".format(species)} |
||
664 | 1 | sdday_ds["{0}_ETOT".format(species)].attrs = { |
|
665 | "units": "cm^{-3}", |
||
666 | "long_name": "{0} density total error".format(species)} |
||
667 | 1 | sdday_ds["{0}_RSTD".format(species)].attrs = dict( |
|
668 | units='%', |
||
669 | long_name='{0} relative standard deviation'.format(species)) |
||
670 | 1 | sdday_ds["{0}_AKDIAG".format(species)].attrs = dict( |
|
671 | units='1', |
||
672 | long_name='{0} averaging kernel diagonal element'.format(species)) |
||
673 | 1 | sdday_ds["{0}_APRIORI".format(species)].attrs = dict( |
|
674 | units='cm^{-3}', long_name='{0} apriori density'.format(species)) |
||
675 | 1 | sdday_ds["{0}_NOEM".format(species)].attrs = dict( |
|
676 | units='cm^{-3}', long_name='NOEM {0} number density'.format(species)) |
||
677 | 1 | sdday_ds["{0}_VMR".format(species)].attrs = dict( |
|
678 | units='ppb', long_name='{0} volume mixing ratio'.format(species)) |
||
679 | 1 | sdday_ds["MSIS_Dens"].attrs = dict(units='cm^{-3}', |
|
680 | long_name='MSIS total number density', |
||
681 | model="NRLMSIS-00") |
||
682 | 1 | sdday_ds["MSIS_Temp"].attrs = dict(units='K', |
|
683 | long_name='MSIS temperature', |
||
684 | model="NRLMSIS-00") |
||
685 | 1 | sdday_ds["longitude"].attrs = dict(long_name='longitude', |
|
686 | standard_name='longitude', units='degrees_east') |
||
687 | 1 | sdday_ds["app_LST"].attrs = dict(units='hours', |
|
688 | long_name='apparent local solar time') |
||
689 | 1 | sdday_ds["mean_LST"].attrs = dict(units='hours', |
|
690 | long_name='mean local solar time') |
||
691 | 1 | sdday_ds["mean_SZA"].attrs = dict(units='degrees', |
|
692 | long_name='solar zenith angle at mean altitude') |
||
693 | 1 | sdday_ds["UTC"].attrs = dict(units='hours', |
|
694 | long_name='measurement utc time') |
||
695 | 1 | sdday_ds["utc_days"].attrs = dict( |
|
696 | units='days since {0}'.format( |
||
697 | pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")), |
||
698 | long_name='measurement utc day') |
||
699 | 1 | sdday_ds["gm_lats"].attrs = dict(long_name='geomagnetic_latitude', |
|
700 | model='IGRF', units='degrees_north') |
||
701 | 1 | sdday_ds["gm_lons"].attrs = dict(long_name='geomagnetic_longitude', |
|
702 | model='IGRF', units='degrees_east') |
||
703 | 1 | sdday_ds["aacgm_gm_lats"].attrs = dict(long_name='geomagnetic_latitude', |
|
704 | # model='AACGM2005 80 km', # v2.3 |
||
705 | model='AACGM', # v2.1, v2.2 |
||
706 | units='degrees_north') |
||
707 | 1 | sdday_ds["aacgm_gm_lons"].attrs = dict(long_name='geomagnetic_longitude', |
|
708 | # model='AACGM2005 80 km', # v2.3 |
||
709 | model='AACGM', # v2.1, v2.2 |
||
710 | units='degrees_east') |
||
711 | 1 | sdday_ds["orbit"].attrs = dict( |
|
712 | axis='T', calendar='standard', |
||
713 | # long_name='SCIAMACHY/Envisat orbit number', # v2.3 |
||
714 | long_name='orbit', # v2.1, v2.2 |
||
715 | standard_name="orbit", |
||
716 | # units='1', # v2.3 |
||
717 | units='orbit number', # v2.1, v2.2 |
||
718 | ) |
||
719 | # Overwrite version-specific variable attributes |
||
720 | 1 | for _v, _a in VAR_ATTRS[file_version].items(): |
|
721 | 1 | sdday_ds[_v].attrs = _a |
|
722 | 1 | if rename: |
|
723 | # version specific renaming |
||
724 | 1 | sdday_ds = sdday_ds.rename(VAR_RENAME[file_version]) |
|
725 | 1 | if int(file_version.split(".")[0]) < 3: |
|
726 | # invert latitudes for backwards-compatitbility |
||
727 | 1 | sdday_ds = sdday_ds.sortby("latitude", ascending=False) |
|
728 | else: |
||
729 | sdday_ds = sdday_ds.sortby("latitude", ascending=True) |
||
730 | |||
731 | # for var in FLOAT_VARS: |
||
732 | # _attrs = sdday_ds[var].attrs |
||
733 | # sdday_ds[var] = sdday_ds[var].astype('float32') |
||
734 | # sdday_ds[var].attrs = _attrs |
||
735 | |||
736 | 1 | dateo = pd.to_datetime( |
|
737 | xr.conventions.decode_cf(sdday_ds[["time"]]).time.data[0], |
||
738 | utc=True, |
||
739 | ).strftime("%Y-%m-%d") |
||
740 | 1 | logging.debug("date %s dataset: %s", dateo, sdday_ds) |
|
741 | 1 | return sdday_ds |
|
742 | |||
743 | |||
744 | 1 | def main(): |
|
745 | """SCIAMACHY level 2 post processing |
||
746 | """ |
||
747 | 1 | logging.basicConfig(level=logging.WARNING, |
|
748 | format="[%(levelname)-8s] (%(asctime)s) " |
||
749 | "%(filename)s:%(lineno)d %(message)s", |
||
750 | datefmt="%Y-%m-%d %H:%M:%S %z") |
||
751 | |||
752 | 1 | parser = ap.ArgumentParser() |
|
753 | 1 | parser.add_argument("file", default="SCIA_NO.nc", |
|
754 | help="the filename of the output netcdf file") |
||
755 | 1 | parser.add_argument("-M", "--month", metavar="YEAR-MM", |
|
756 | help="infer start and end dates for month") |
||
757 | 1 | parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE", |
|
758 | help="colon-separated start and end dates") |
||
759 | 1 | parser.add_argument("-d", "--dates", help="comma-separated list of dates") |
|
760 | 1 | parser.add_argument("-B", "--base_date", |
|
761 | metavar="YEAR-MM-DD", default="2000-01-01", |
||
762 | help="Reference date to base the time values (days) on " |
||
763 | "(default: %(default)s).") |
||
764 | 1 | parser.add_argument("-f", "--orbit_file", |
|
765 | help="the file containing the input orbits") |
||
766 | 1 | parser.add_argument("-r", "--retrieval_version", default="v6.2", |
|
767 | help="SCIAMACHY level 2 data version to process") |
||
768 | 1 | parser.add_argument("-R", "--file_version", default="2.2", |
|
769 | help="Postprocessing format version of the output file") |
||
770 | 1 | parser.add_argument("-A", "--author", default="unknown", |
|
771 | help="Author of the post-processed data set " |
||
772 | "(default: %(default)s)") |
||
773 | 1 | parser.add_argument("-p", "--path", default=None, |
|
774 | help="path containing the L2 data") |
||
775 | 1 | parser.add_argument("-s", "--spectra", default=None, metavar="PATH", |
|
776 | help="path containing the L1c spectra") |
||
777 | 1 | parser.add_argument("-m", "--mlt", action="store_true", default=False, |
|
778 | help="indicate nominal (False, default) or MLT data (True)") |
||
779 | 1 | parser.add_argument("-X", "--xarray", action="store_true", default=False, |
|
780 | help="DEPRECATED, kept for compatibility reasons, does nothing.") |
||
781 | 1 | loglevels = parser.add_mutually_exclusive_group() |
|
782 | 1 | loglevels.add_argument("-l", "--loglevel", default="WARNING", |
|
783 | choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], |
||
784 | help="change the loglevel (default: 'WARNING')") |
||
785 | 1 | loglevels.add_argument("-q", "--quiet", action="store_true", default=False, |
|
786 | help="less output, same as --loglevel=ERROR (default: False)") |
||
787 | 1 | loglevels.add_argument("-v", "--verbose", action="store_true", default=False, |
|
788 | help="verbose output, same as --loglevel=INFO (default: False)") |
||
789 | 1 | args = parser.parse_args() |
|
790 | 1 | if args.quiet: |
|
791 | logging.getLogger().setLevel(logging.ERROR) |
||
792 | 1 | elif args.verbose: |
|
793 | logging.getLogger().setLevel(logging.INFO) |
||
794 | else: |
||
795 | 1 | logging.getLogger().setLevel(args.loglevel) |
|
796 | |||
797 | 1 | logging.info("processing L2 version: %s", args.retrieval_version) |
|
798 | 1 | logging.info("writing data file version: %s", args.file_version) |
|
799 | |||
800 | 1 | pddrange = [] |
|
801 | 1 | if args.month is not None: |
|
802 | 1 | d0 = pd.to_datetime(args.month + "-01", utc=True) |
|
803 | 1 | pddrange.extend(pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd())) |
|
804 | 1 | if args.date_range is not None: |
|
805 | pddrange.extend(pd.date_range(*args.date_range.split(':'))) |
||
806 | 1 | if args.dates is not None: |
|
807 | pddrange.extend(pd.to_datetime(args.dates.split(','), utc=True)) |
||
808 | 1 | logging.debug("pddrange: %s", pddrange) |
|
809 | |||
810 | 1 | olist = [] |
|
811 | 1 | for date in pddrange: |
|
812 | 1 | try: |
|
813 | 1 | olist += get_orbits_from_date(date.strftime("%Y-%m-%d"), |
|
814 | mlt=args.mlt, path=args.path, L2_version=args.retrieval_version) |
||
815 | 1 | except: # handle NaT |
|
816 | 1 | pass |
|
817 | 1 | if args.orbit_file is not None: |
|
818 | olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist() |
||
819 | 1 | logging.debug("olist: %s", olist) |
|
820 | |||
821 | 1 | if not olist: |
|
822 | logging.warn("No orbits to process.") |
||
823 | return |
||
824 | |||
825 | 1 | sd_xr = combine_orbit_data(olist, |
|
826 | ref_date=args.base_date, |
||
827 | L2_version=args.retrieval_version, |
||
828 | dens_path=args.path, spec_base=args.spectra, save_nc=False) |
||
829 | |||
830 | 1 | if sd_xr is None: |
|
831 | logging.warn("Processed data is empty.") |
||
832 | return |
||
833 | |||
834 | 1 | sd_xr = sddata_set_attrs(sd_xr, ref_date=args.base_date, file_version=args.file_version) |
|
835 | 1 | sd_xr = sd_xr[sorted(sd_xr.variables)] |
|
836 | 1 | sd_xr.attrs["author"] = args.author |
|
837 | 1 | sd_xr.attrs["creation_time"] = dt.datetime.utcnow().strftime( |
|
838 | "%a %b %d %Y %H:%M:%S +00:00 (UTC)") |
||
839 | 1 | sd_xr.attrs["software"] = "sciapy {0}".format(__version__) |
|
840 | 1 | sd_xr.attrs["L2_data_version"] = args.retrieval_version |
|
841 | 1 | sd_xr.attrs["version"] = args.file_version |
|
842 | 1 | logging.debug(sd_xr) |
|
843 | 1 | sd_xr.to_netcdf(args.file, unlimited_dims=["time"]) |
|
844 | |||
845 | |||
846 | 1 | if __name__ == "__main__": |
|
847 | main() |
||
848 |