| Total Complexity | 59 |
| Total Lines | 799 |
| Duplicated Lines | 82.6 % |
| Changes | 0 | ||
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like scia_post_process_l2 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | #!/usr/bin/env python |
||
| 2 | # vim: set fileencoding=utf-8 |
||
| 3 | """SCIAMACHY level 2 data post processing |
||
| 4 | |||
| 5 | Main script for SCIAMACHY orbital retrieval post processing |
||
| 6 | and data combining (to netcdf). |
||
| 7 | |||
| 8 | Copyright (c) 2018 Stefan Bender |
||
| 9 | |||
| 10 | This file is part of sciapy. |
||
| 11 | sciapy is free software: you can redistribute it or modify it |
||
| 12 | under the terms of the GNU General Public License as published by |
||
| 13 | the Free Software Foundation, version 2. |
||
| 14 | See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html. |
||
| 15 | """ |
||
| 16 | |||
| 17 | from __future__ import absolute_import, division, print_function |
||
| 18 | |||
| 19 | import glob |
||
| 20 | import os |
||
| 21 | import argparse as ap |
||
| 22 | import datetime as dt |
||
| 23 | import logging |
||
| 24 | from pkg_resources import resource_filename |
||
| 25 | |||
| 26 | import numpy as np |
||
| 27 | import pandas as pd |
||
| 28 | import xarray as xr |
||
| 29 | from scipy.interpolate import interp1d |
||
| 30 | |||
| 31 | from astropy import units |
||
| 32 | from astropy.time import Time |
||
| 33 | import astropy.coordinates as coord |
||
| 34 | |||
| 35 | import sciapy.level1c as sl |
||
| 36 | from sciapy.level2 import density_pp as sd |
||
| 37 | from sciapy.level2 import scia_akm as sa |
||
| 38 | from sciapy.level2.igrf import gmag_igrf |
||
| 39 | from sciapy.level2.aacgm2005 import gmag_aacgm2005 |
||
| 40 | try: |
||
| 41 | from nrlmsise00 import msise_flat as msise |
||
| 42 | except ImportError: |
||
| 43 | msise = None |
||
| 44 | try: |
||
| 45 | from sciapy.level2.noem import noem_cpp |
||
| 46 | except ImportError: |
||
| 47 | noem_cpp = None |
||
| 48 | |||
| 49 | F107_FILE = resource_filename("sciapy", "data/indices/f107_noontime_flux_obs.txt") |
||
| 50 | F107A_FILE = resource_filename("sciapy", "data/indices/f107a_noontime_flux_obs.txt") |
||
| 51 | AP_FILE = resource_filename("sciapy", "data/indices/spidr_ap_2000-2012.dat") |
||
| 52 | F107_ADJ_FILE = resource_filename("sciapy", "data/indices/spidr_f107_2000-2012.dat") |
||
| 53 | KP_FILE = resource_filename("sciapy", "data/indices/spidr_kp_2000-2012.dat") |
||
| 54 | |||
| 55 | PHI_FAC = 11.91 |
||
| 56 | LST_FAC = -0.62 |
||
| 57 | |||
| 58 | |||
| 59 | View Code Duplication | def solar_zenith_angle(alt, lat, lon, time): |
|
|
|
|||
| 60 | atime = Time(time) |
||
| 61 | loc = coord.EarthLocation.from_geodetic( |
||
| 62 | height=alt * units.km, |
||
| 63 | lat=lat * units.deg, |
||
| 64 | lon=lon * units.deg, |
||
| 65 | ) |
||
| 66 | altaz = coord.AltAz(location=loc, obstime=atime) |
||
| 67 | sun = coord.get_sun(atime) |
||
| 68 | return sun.transform_to(altaz).zen.value |
||
| 69 | |||
| 70 | |||
| 71 | View Code Duplication | def read_spectra(year, orbit, spec_base=None, skip_upleg=True): |
|
| 72 | """Read and examine SCIAMACHY orbit spectra |
||
| 73 | |||
| 74 | Reads the limb spactra and extracts the dates, times, latitudes, |
||
| 75 | longitudes to be used to re-assess the retrieved geolocations. |
||
| 76 | |||
| 77 | Parameters |
||
| 78 | ---------- |
||
| 79 | year: int |
||
| 80 | The measurement year to select the corresponding subdir |
||
| 81 | below `spec_base` (see below). |
||
| 82 | orbit: int |
||
| 83 | SCIAMACHY/Envisat orbit number of the spectra. |
||
| 84 | spec_base: str, optional |
||
| 85 | The root path to the level 1c spectra. Uses the current |
||
| 86 | dir if not set or set to `None` (default). |
||
| 87 | skip_upleg: bool, optional |
||
| 88 | Skip upleg limb scans, i.e. night time scans. For NO retrievals, |
||
| 89 | those are not used and should be not used here. |
||
| 90 | Default: True |
||
| 91 | |||
| 92 | Returns |
||
| 93 | ------- |
||
| 94 | (dts, times, lats, lons, mlsts, alsts, eotcorr) |
||
| 95 | """ |
||
| 96 | fail = (None,) * 7 |
||
| 97 | if spec_base is None: |
||
| 98 | spec_base = os.curdir |
||
| 99 | spec_path = os.path.join(spec_base, "{0}".format(year)) |
||
| 100 | spec_path2 = os.path.join(spec_base, "{0}".format(int(year) + 1)) |
||
| 101 | logging.debug("spec_path: %s", spec_path) |
||
| 102 | logging.debug("spec_path2: %s", spec_path) |
||
| 103 | if not (os.path.isdir(spec_path) or os.path.isdir(spec_path2)): |
||
| 104 | return fail |
||
| 105 | |||
| 106 | # the star stands for the (optional) date subdir |
||
| 107 | # to find all spectra for the orbit |
||
| 108 | spfiles = glob.glob( |
||
| 109 | '{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary' |
||
| 110 | .format(spec_path, orbit)) |
||
| 111 | # sometimes for whatever reason the orbit ends up in the wrong year subdir |
||
| 112 | # looks in the subdir for the following year as well. |
||
| 113 | spfiles += glob.glob( |
||
| 114 | '{0}/*/SCIA_limb_*_1_0_{1:05d}.dat.l_mpl_binary' |
||
| 115 | .format(spec_path2, orbit)) |
||
| 116 | if len(spfiles) < 2: |
||
| 117 | return fail |
||
| 118 | |||
| 119 | dts = [] |
||
| 120 | times = [] |
||
| 121 | lats = [] |
||
| 122 | lons = [] |
||
| 123 | mlsts = [] |
||
| 124 | alsts = [] |
||
| 125 | |||
| 126 | sls = sl.scia_limb_scan() |
||
| 127 | for f in sorted(spfiles): |
||
| 128 | sls.read_from_file(f) |
||
| 129 | # copy the values from the l1c file |
||
| 130 | lat, lon = sls.cent_lat_lon[:2] |
||
| 131 | mlst, alst, eotcorr = sls.local_solar_time(False) |
||
| 132 | tp_lats = sls.limb_data.tp_lat |
||
| 133 | date = sls.date |
||
| 134 | # debug output if requested |
||
| 135 | logging.debug("file: %s", f) |
||
| 136 | logging.debug("lat: %s, lon: %s", lat, lon) |
||
| 137 | logging.debug("mlst: %s, alst: %s, eotcorr: %s", mlst, alst, eotcorr) |
||
| 138 | logging.debug("tp_lats: %s", tp_lats) |
||
| 139 | logging.debug("date: %s", date) |
||
| 140 | if skip_upleg and ((tp_lats[1] - tp_lats[-2]) < 0.5): |
||
| 141 | # Exclude non-downleg measurements where the latitude |
||
| 142 | # of the last real tangent point (the last is dark sky) |
||
| 143 | # is larger than or too close to the first latitude. |
||
| 144 | # Requires an (empirical) separation of +0.5 degree. |
||
| 145 | logging.debug("excluding upleg point at: %s, %s", lat, lon) |
||
| 146 | continue |
||
| 147 | dtdate = pd.to_datetime(dt.datetime(*date), utc=True) |
||
| 148 | time_hour = dtdate.hour + dtdate.minute / 60.0 + dtdate.second / 3600.0 |
||
| 149 | logging.debug("mean lst: %s, apparent lst: %s, EoT: %s", mlst, alst, eotcorr) |
||
| 150 | dts.append(dtdate) |
||
| 151 | times.append(time_hour) |
||
| 152 | lats.append(lat) |
||
| 153 | lons.append(lon) |
||
| 154 | mlsts.append(mlst) |
||
| 155 | alsts.append(alst) |
||
| 156 | |||
| 157 | if len(lats) < 2: |
||
| 158 | # interpolation will fail with less than 2 points |
||
| 159 | return fail |
||
| 160 | |||
| 161 | return (np.asarray(dts), |
||
| 162 | np.asarray(times), |
||
| 163 | np.asarray(lats), |
||
| 164 | np.asarray(lons), |
||
| 165 | np.asarray(mlsts), |
||
| 166 | np.asarray(alsts), eotcorr) |
||
| 167 | |||
| 168 | |||
| 169 | class _circ_interp(object): |
||
| 170 | """Interpolation on a circle""" |
||
| 171 | def __init__(self, x, y, **kw): |
||
| 172 | self.c_intpf = interp1d(x, np.cos(y), **kw) |
||
| 173 | self.s_intpf = interp1d(x, np.sin(y), **kw) |
||
| 174 | |||
| 175 | def __call__(self, x): |
||
| 176 | return np.arctan2(self.s_intpf(x), self.c_intpf(x)) |
||
| 177 | |||
| 178 | |||
| 179 | View Code Duplication | def process_orbit( |
|
| 180 | orbit, |
||
| 181 | ref_date="2000-01-01", |
||
| 182 | dens_path=None, |
||
| 183 | spec_base=None, |
||
| 184 | use_msis=True, |
||
| 185 | ): |
||
| 186 | """Post process retrieved SCIAMACHY orbit |
||
| 187 | |||
| 188 | Parameters |
||
| 189 | ---------- |
||
| 190 | orbit: int |
||
| 191 | SCIAMACHY/Envisat orbit number of the results to process. |
||
| 192 | ref_date: str, optional |
||
| 193 | Base date to calculate the relative days from, |
||
| 194 | of the format "%Y-%m-%d". Default: 2000-01-01 |
||
| 195 | dens_path: str, optional |
||
| 196 | The path to the level 2 data. Uses the current |
||
| 197 | dir if not set or set to `None` (default). |
||
| 198 | spec_base: str, optional |
||
| 199 | The root path to the level 1c spectra. Uses the current |
||
| 200 | dir if not set or set to `None` (default). |
||
| 201 | |||
| 202 | Returns |
||
| 203 | ------- |
||
| 204 | (dts0, time0, lst0, lon0, sdd): tuple |
||
| 205 | dts0 - days since ref_date at equator crossing (float) |
||
| 206 | time0 - utc hour into the day at equator crossing (float) |
||
| 207 | lst0 - apparent local solar time at the equator (float) |
||
| 208 | lon0 - longitude of the equator crossing (float) |
||
| 209 | sdd - `scia_density_pp` instance of the post-processed data |
||
| 210 | """ |
||
| 211 | def _read_gm(fname): |
||
| 212 | return dict(np.genfromtxt(fname, usecols=[0, 2], dtype=None)) |
||
| 213 | |||
| 214 | fail = (None,) * 5 |
||
| 215 | logging.debug("processing orbit: %s", orbit) |
||
| 216 | dtrefdate = pd.to_datetime(ref_date, format="%Y-%m-%d", utc=True) |
||
| 217 | |||
| 218 | dfiles = glob.glob( |
||
| 219 | "{0}/000NO_orbit_{1:05d}_*_Dichten.txt" |
||
| 220 | .format(dens_path, orbit)) |
||
| 221 | if len(dfiles) < 1: |
||
| 222 | return fail |
||
| 223 | logging.debug("dfiles: %s", dfiles) |
||
| 224 | logging.debug("splits: %s", [fn.split('/') for fn in dfiles]) |
||
| 225 | ddict = dict([(fn, (fn.split('/')[-3:-1] + fn.split('/')[-1].split('_')[3:4])) |
||
| 226 | for fn in dfiles]) |
||
| 227 | logging.debug("ddict: %s", ddict) |
||
| 228 | year = ddict[sorted(ddict.keys())[0]][-1][:4] |
||
| 229 | logging.debug("year: %s", year) |
||
| 230 | |||
| 231 | dts, times, lats, lons, mlsts, alsts, eotcorr = \ |
||
| 232 | read_spectra(year, orbit, spec_base) |
||
| 233 | if dts is None: |
||
| 234 | # return early if reading the spectra failed |
||
| 235 | return fail |
||
| 236 | |||
| 237 | dts = pd.to_datetime(dts, utc=True) - dtrefdate |
||
| 238 | dts = np.array([dtd.days + dtd.seconds / 86400. for dtd in dts]) |
||
| 239 | logging.debug("lats: %s, lons: %s, times: %s", lats, lons, times) |
||
| 240 | |||
| 241 | sdd = sd.scia_densities_pp(ref_date=ref_date) |
||
| 242 | sdd.read_from_file(dfiles[0]) |
||
| 243 | logging.debug("density lats: %s, lons: %s", sdd.lats, sdd.lons) |
||
| 244 | |||
| 245 | # Re-interpolates the location (longitude) and times from the |
||
| 246 | # limb scan spectra files along the orbit to determine the values |
||
| 247 | # at the Equator and to fill in possibly missing data. |
||
| 248 | # |
||
| 249 | # y values are unit circle angles in radians (0 < φ < 2π or -π < φ < π) |
||
| 250 | # longitudes |
||
| 251 | lons_intpf = _circ_interp( |
||
| 252 | lats[::-1], np.radians(lons[::-1]), |
||
| 253 | fill_value="extrapolate", |
||
| 254 | ) |
||
| 255 | # apparent local solar time (EoT corrected) |
||
| 256 | lst_intpf = _circ_interp( |
||
| 257 | lats[::-1], np.pi / 12. * alsts[::-1], |
||
| 258 | fill_value="extrapolate", |
||
| 259 | ) |
||
| 260 | # mean local solar time |
||
| 261 | mst_intpf = _circ_interp( |
||
| 262 | lats[::-1], np.pi / 12. * mlsts[::-1], |
||
| 263 | fill_value="extrapolate", |
||
| 264 | ) |
||
| 265 | # utc time (day) |
||
| 266 | time_intpf = _circ_interp( |
||
| 267 | lats[::-1], np.pi / 12. * times[::-1], |
||
| 268 | fill_value="extrapolate", |
||
| 269 | ) |
||
| 270 | # datetime |
||
| 271 | dts_retr_interpf = interp1d(lats[::-1], dts[::-1], fill_value="extrapolate") |
||
| 272 | |||
| 273 | # equator values |
||
| 274 | lon0 = np.degrees(lons_intpf(0.)) % 360. |
||
| 275 | lst0 = (lst_intpf(0.) * 12. / np.pi) % 24. |
||
| 276 | mst0 = (mst_intpf(0.) * 12. / np.pi) % 24. |
||
| 277 | time0 = (time_intpf(0.) * 12. / np.pi) % 24. |
||
| 278 | dts_retr_interp0 = dts_retr_interpf(0.) |
||
| 279 | logging.debug("utc day at equator: %s", dts_retr_interp0) |
||
| 280 | logging.debug("mean LST at equator: %s, apparent LST at equator: %s", mst0, lst0) |
||
| 281 | |||
| 282 | sdd.utchour = (time_intpf(sdd.lats) * 12. / np.pi) % 24. |
||
| 283 | sdd.utcdays = dts_retr_interpf(sdd.lats) |
||
| 284 | |||
| 285 | if sdd.lons is None: |
||
| 286 | # recalculate the longitudes |
||
| 287 | # estimate the equatorial longitude from the |
||
| 288 | # limb scan latitudes and longitudes |
||
| 289 | lon0s_tp = lons - PHI_FAC * np.tan(np.radians(lats)) |
||
| 290 | clon0s_tp = np.cos(np.radians(lon0s_tp)) |
||
| 291 | slon0s_tp = np.sin(np.radians(lon0s_tp)) |
||
| 292 | lon0_tp = np.arctan2(np.sum(slon0s_tp[1:-1]), np.sum(clon0s_tp[1:-1])) |
||
| 293 | lon0_tp = np.degrees((lon0_tp + 2. * np.pi) % (2. * np.pi)) |
||
| 294 | logging.info("lon0: %s", lon0) |
||
| 295 | logging.info("lon0 tp: %s", lon0_tp) |
||
| 296 | # interpolate to the retrieval latitudes |
||
| 297 | tg_retr_lats = np.tan(np.radians(sdd.lats)) |
||
| 298 | calc_lons = (tg_retr_lats * PHI_FAC + lon0) % 360. |
||
| 299 | calc_lons_tp = (tg_retr_lats * PHI_FAC + lon0_tp) % 360. |
||
| 300 | sdd.lons = calc_lons_tp |
||
| 301 | logging.debug("(calculated) retrieval lons: %s, %s", |
||
| 302 | calc_lons, calc_lons_tp) |
||
| 303 | else: |
||
| 304 | # sdd.lons = sdd.lons % 360. |
||
| 305 | logging.debug("(original) retrieval lons: %s", sdd.lons) |
||
| 306 | |||
| 307 | sdd.mst = (sdd.utchour + sdd.lons / 15.) % 24. |
||
| 308 | sdd.lst = sdd.mst + eotcorr / 60. |
||
| 309 | mean_alt_km = sdd.alts.mean() |
||
| 310 | |||
| 311 | dt_date_this = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate |
||
| 312 | logging.info("date: %s", dt_date_this) |
||
| 313 | # caclulate geomagnetic coordinates |
||
| 314 | sdd.gmlats, sdd.gmlons = gmag_igrf(dt_date_this, sdd.lats, sdd.lons, alt=mean_alt_km) |
||
| 315 | logging.debug("geomag. lats: %s, lons: %s", sdd.gmlats, sdd.gmlons) |
||
| 316 | sdd.aacgmgmlats, sdd.aacgmgmlons = gmag_aacgm2005(sdd.lats, sdd.lons) |
||
| 317 | logging.debug("aacgm geomag. lats: %s, lons: %s", |
||
| 318 | sdd.aacgmgmlats, sdd.aacgmgmlons) |
||
| 319 | |||
| 320 | # current day for MSIS input |
||
| 321 | f107_data = _read_gm(F107_FILE) |
||
| 322 | f107a_data = _read_gm(F107A_FILE) |
||
| 323 | ap_data = _read_gm(AP_FILE) |
||
| 324 | msis_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0)) + dtrefdate |
||
| 325 | msis_dtdate1 = msis_dtdate - dt.timedelta(days=1) |
||
| 326 | msis_date = msis_dtdate.strftime("%Y-%m-%d").encode() |
||
| 327 | msis_date1 = msis_dtdate1.strftime("%Y-%m-%d").encode() |
||
| 328 | msis_f107 = f107_data[msis_date1] |
||
| 329 | msis_f107a = f107a_data[msis_date] |
||
| 330 | msis_ap = ap_data[msis_date] |
||
| 331 | logging.debug("MSIS date: %s, f10.7a: %s, f10.7: %s, ap: %s", |
||
| 332 | msis_date, msis_f107a, msis_f107, msis_ap) |
||
| 333 | |||
| 334 | # previous day for NOEM input |
||
| 335 | f107_adj = _read_gm(F107_ADJ_FILE) |
||
| 336 | kp_data = _read_gm(KP_FILE) |
||
| 337 | noem_dtdate = dt.timedelta(np.asscalar(dts_retr_interp0) - 1) + dtrefdate |
||
| 338 | noem_date = noem_dtdate.strftime("%Y-%m-%d").encode() |
||
| 339 | noem_f107 = f107_adj[noem_date] |
||
| 340 | noem_kp = kp_data[noem_date] |
||
| 341 | logging.debug("NOEM date: %s, f10.7: %s, kp: %s", |
||
| 342 | noem_date, noem_f107, noem_kp) |
||
| 343 | |||
| 344 | if sdd.noem_no is None: |
||
| 345 | sdd.noem_no = np.zeros_like(sdd.densities) |
||
| 346 | if sdd.temperature is None and msise is None: |
||
| 347 | sdd.temperature = np.full_like(sdd.densities, np.nan) |
||
| 348 | if sdd.sza is None: |
||
| 349 | sdd.sza = np.zeros_like(sdd.lats) |
||
| 350 | if sdd.akdiag is None: |
||
| 351 | sdd.akdiag = np.zeros_like(sdd.densities) |
||
| 352 | akm_filename = glob.glob( |
||
| 353 | "{0}/000NO_orbit_{1:05d}_*_AKM*" |
||
| 354 | .format(dens_path, orbit))[0] |
||
| 355 | logging.debug("ak file: %s", akm_filename) |
||
| 356 | ak = sa.read_akm(akm_filename, sdd.nalt, sdd.nlat) |
||
| 357 | logging.debug("ak data: %s", ak) |
||
| 358 | sdd.akdiag = ak.diagonal(axis1=1, axis2=3).diagonal(axis1=0, axis2=1) |
||
| 359 | |||
| 360 | if msise is not None: |
||
| 361 | if sdd.temperature is None or use_msis: |
||
| 362 | _msis_d_t = msise( |
||
| 363 | msis_dtdate, |
||
| 364 | sdd.alts[None, :], sdd.lats[:, None], sdd.lons[:, None] % 360., |
||
| 365 | msis_f107a, msis_f107, msis_ap, |
||
| 366 | lst=sdd.lst[:, None], |
||
| 367 | ) |
||
| 368 | sdd.temperature = _msis_d_t[:, :, -1] |
||
| 369 | if use_msis: |
||
| 370 | sdd.dens_tot = np.sum(_msis_d_t[:, :, np.r_[:5, 6:9]], axis=2) |
||
| 371 | for i, lat in enumerate(sdd.lats): |
||
| 372 | if noem_cpp is not None: |
||
| 373 | sdd.noem_no[i] = noem_cpp(noem_date.decode(), sdd.alts, |
||
| 374 | [lat], [sdd.lons[i]], noem_f107, noem_kp)[:] |
||
| 375 | else: |
||
| 376 | sdd.noem_no[i][:] = np.nan |
||
| 377 | sdd.sza[:] = solar_zenith_angle( |
||
| 378 | mean_alt_km, |
||
| 379 | sdd.lats, sdd.lons, |
||
| 380 | (pd.to_timedelta(sdd.utcdays, unit="days") + dtrefdate).to_pydatetime(), |
||
| 381 | ) |
||
| 382 | sdd.vmr = sdd.densities / sdd.dens_tot * 1.e9 # ppb |
||
| 383 | return dts_retr_interp0, time0, lst0, lon0, sdd |
||
| 384 | |||
| 385 | |||
| 386 | View Code Duplication | def get_orbits_from_date(date, mlt=False, path=None, L2_version="v6.2"): |
|
| 387 | """Find SCIAMACHY orbits with retrieved data at a date |
||
| 388 | |||
| 389 | Parameters |
||
| 390 | ---------- |
||
| 391 | date: str |
||
| 392 | The date in the format "%Y-%m-%d". |
||
| 393 | mlt: bool, optional |
||
| 394 | Look for MLT mode data instead of nominal mode data. |
||
| 395 | Increases the heuristics to find all MLT orbits. |
||
| 396 | path: str, optional |
||
| 397 | The path to the level 2 data. If `None` tries to infer |
||
| 398 | the data directory from the L2 version using |
||
| 399 | './*<L2_version>'. Default: None |
||
| 400 | |||
| 401 | Returns |
||
| 402 | ------- |
||
| 403 | orbits: list |
||
| 404 | List of found orbits with retrieved data files |
||
| 405 | """ |
||
| 406 | logging.debug("pre-processing: %s", date) |
||
| 407 | if path is None: |
||
| 408 | density_base = os.curdir |
||
| 409 | path = "{0}/*{1}".format(density_base, L2_version) |
||
| 410 | logging.debug("path: %s", path) |
||
| 411 | |||
| 412 | dfiles = glob.glob("{0}/000NO_orbit_*_{1}_Dichten.txt".format( |
||
| 413 | path, date.replace("-", ""))) |
||
| 414 | orbits = sorted([int(os.path.basename(df).split('_')[2]) for df in dfiles]) |
||
| 415 | if mlt: |
||
| 416 | orbits.append(orbits[-1] + 1) |
||
| 417 | return orbits |
||
| 418 | |||
| 419 | |||
| 420 | View Code Duplication | def combine_orbit_data(orbits, |
|
| 421 | ref_date="2000-01-01", |
||
| 422 | L2_version="v6.2", file_version="2.2", |
||
| 423 | dens_path=None, spec_base=None, |
||
| 424 | use_xarray=False, save_nc=False): |
||
| 425 | """Combine post-processed SCIAMACHY retrieved orbit data |
||
| 426 | |||
| 427 | Parameters |
||
| 428 | ---------- |
||
| 429 | orbits: list |
||
| 430 | List of SCIAMACHY/Envisat orbit numbers to process. |
||
| 431 | ref_date: str, optional |
||
| 432 | Base date to calculate the relative days from, |
||
| 433 | of the format "%Y-%m-%d". Default: 2000-01-01 |
||
| 434 | L2_version: str, optional |
||
| 435 | SCIAMACHY level 2 data version to process |
||
| 436 | file_version: str, optional |
||
| 437 | Postprocessing format version of the output data |
||
| 438 | dens_path: str, optional |
||
| 439 | The path to the level 2 data. If `None` tries to infer |
||
| 440 | the data directory from the L2 version looking for anything |
||
| 441 | in the current directory that ends in <L2_version>: './*<L2_version>'. |
||
| 442 | Default: None |
||
| 443 | spec_base: str, optional |
||
| 444 | The root path to the level 1c spectra. Uses the current |
||
| 445 | dir if not set or set to `None` (default). |
||
| 446 | use_xarray: bool, optional |
||
| 447 | Uses xarray (if available) to combine the orbital data. |
||
| 448 | save_nc: bool, optional |
||
| 449 | Save the intermediate orbit data sets to netcdf files |
||
| 450 | for debugging. |
||
| 451 | |||
| 452 | Returns |
||
| 453 | ------- |
||
| 454 | (sdday, sdday_ds): tuple |
||
| 455 | `sdday` contains the combined data as a `scia_density_day` instance, |
||
| 456 | `sdday_ds` contains the same data as a `xarray.Dataset`. |
||
| 457 | """ |
||
| 458 | if dens_path is None: |
||
| 459 | # try some heuristics |
||
| 460 | density_base = os.curdir |
||
| 461 | dens_path = "{0}/*{1}".format(density_base, L2_version) |
||
| 462 | |||
| 463 | sdday = sd.scia_density_day(ref_date=ref_date) |
||
| 464 | sddayl = [] |
||
| 465 | sdday_ds = None |
||
| 466 | for orbit in sorted(orbits): |
||
| 467 | dateo, timeo, lsto, lono, sdens = process_orbit(orbit, |
||
| 468 | ref_date=ref_date, dens_path=dens_path, spec_base=spec_base) |
||
| 469 | logging.info( |
||
| 470 | "orbit: %s, eq. date: %s, eq. hour: %s, eq. app. lst: %s, eq. lon: %s", |
||
| 471 | orbit, dateo, timeo, lsto, lono |
||
| 472 | ) |
||
| 473 | if sdens is not None: |
||
| 474 | sdens.version = file_version |
||
| 475 | sdens.data_version = L2_version |
||
| 476 | sdday.append_data(dateo, orbit, timeo, sdens) |
||
| 477 | if use_xarray: |
||
| 478 | sd_xr = sdens.to_xarray(dateo, orbit) |
||
| 479 | if sd_xr is not None: |
||
| 480 | logging.debug("orbit %s dataset: %s", orbit, sd_xr) |
||
| 481 | sddayl.append(sd_xr) |
||
| 482 | if save_nc: |
||
| 483 | sdens.write_to_netcdf(sdens.filename[:-3] + "nc") |
||
| 484 | if use_xarray and sddayl: |
||
| 485 | sdday_ds = xr.concat(sddayl, dim="time") |
||
| 486 | return sdday, sdday_ds |
||
| 487 | |||
| 488 | |||
| 489 | VAR_ATTRS = { |
||
| 490 | "2.1": { |
||
| 491 | "MSIS_Dens": dict( |
||
| 492 | units='cm^{-3}', |
||
| 493 | long_name='total number density (NRLMSIS-00)', |
||
| 494 | ), |
||
| 495 | "MSIS_Temp": dict( |
||
| 496 | units='K', |
||
| 497 | long_name='temperature', |
||
| 498 | model="NRLMSIS-00", |
||
| 499 | ), |
||
| 500 | }, |
||
| 501 | "2.2": { |
||
| 502 | }, |
||
| 503 | "2.3": { |
||
| 504 | "aacgm_gm_lats": dict( |
||
| 505 | long_name='geomagnetic_latitude', |
||
| 506 | model='AACGM2005 at 80 km', |
||
| 507 | units='degrees_north', |
||
| 508 | ), |
||
| 509 | "aacgm_gm_lons": dict( |
||
| 510 | long_name='geomagnetic_longitude', |
||
| 511 | model='AACGM2005 at 80 km', |
||
| 512 | units='degrees_east', |
||
| 513 | ), |
||
| 514 | "orbit": dict( |
||
| 515 | axis='T', calendar='standard', |
||
| 516 | long_name='SCIAMACHY/Envisat orbit number', |
||
| 517 | standard_name="orbit", |
||
| 518 | units='1', |
||
| 519 | ), |
||
| 520 | }, |
||
| 521 | } |
||
| 522 | VAR_RENAME = { |
||
| 523 | "2.1": { |
||
| 524 | # Rename to v2.1 variable names |
||
| 525 | "MSIS_Dens": "TOT_DENS", |
||
| 526 | "MSIS_Temp": "temperature", |
||
| 527 | }, |
||
| 528 | "2.2": { |
||
| 529 | }, |
||
| 530 | "2.3": { |
||
| 531 | }, |
||
| 532 | } |
||
| 533 | FLOAT_VARS = [ |
||
| 534 | "altitude", "latitude", "longitude", |
||
| 535 | "app_LST", "mean_LST", "mean_SZA", |
||
| 536 | "aacgm_gm_lats", "aacgm_gm_lons", |
||
| 537 | "gm_lats", "gm_lons", |
||
| 538 | ] |
||
| 539 | |||
| 540 | |||
| 541 | View Code Duplication | def sddata_xr_set_attrs( |
|
| 542 | sdday_xr, |
||
| 543 | file_version="2.2", |
||
| 544 | ref_date="2000-01-01", |
||
| 545 | rename=True, |
||
| 546 | species="NO", |
||
| 547 | ): |
||
| 548 | """Customize xarray Dataset variables and attributes |
||
| 549 | |||
| 550 | Changes the variable names to match those exported from the |
||
| 551 | `scia_density_day` class. |
||
| 552 | |||
| 553 | Parameters |
||
| 554 | ---------- |
||
| 555 | sdday_xr: `xarray.Dataset` instance |
||
| 556 | The combined dataset. |
||
| 557 | file_version: string "major.minor", optional |
||
| 558 | The netcdf file datase version, determines some variable |
||
| 559 | names and attributes. |
||
| 560 | ref_date: str, optional |
||
| 561 | Base date to calculate the relative days from, |
||
| 562 | of the format "%Y-%m-%d". Default: 2000-01-01 |
||
| 563 | rename: bool, optional |
||
| 564 | Rename the dataset variables to match the |
||
| 565 | `scia_density_day` exported ones. |
||
| 566 | Default: True |
||
| 567 | species: str, optional |
||
| 568 | The name of the level 2 species, used to prefix the |
||
| 569 | dataset variables to be named <species>_<variable>. |
||
| 570 | Default: "NO". |
||
| 571 | """ |
||
| 572 | if rename: |
||
| 573 | sdday_xr = sdday_xr.rename({ |
||
| 574 | # 2d vars |
||
| 575 | "akm_diagonal": "{0}_AKDIAG".format(species), |
||
| 576 | "apriori": "{0}_APRIORI".format(species), |
||
| 577 | "density": "{0}_DENS".format(species), |
||
| 578 | "density_air": "MSIS_Dens", |
||
| 579 | "error_meas": "{0}_ERR".format(species), |
||
| 580 | "error_tot": "{0}_ETOT".format(species), |
||
| 581 | "temperature": "MSIS_Temp", |
||
| 582 | "NOEM_density": "{0}_NOEM".format(species), |
||
| 583 | "VMR": "{0}_VMR".format(species), |
||
| 584 | # 1d vars and dimensions |
||
| 585 | "app_lst": "app_LST", |
||
| 586 | "mean_lst": "mean_LST", |
||
| 587 | "mean_sza": "mean_SZA", |
||
| 588 | "utc_hour": "UTC", |
||
| 589 | }) |
||
| 590 | # relative standard deviation |
||
| 591 | sdday_xr["{0}_RSTD".format(species)] = 100.0 * np.abs( |
||
| 592 | sdday_xr["{0}_ERR".format(species)] / sdday_xr["{0}_DENS".format(species)]) |
||
| 593 | # fix coordinate attributes |
||
| 594 | sdday_xr["time"].attrs = dict(axis='T', standard_name='time', |
||
| 595 | calendar='standard', long_name='equatorial crossing time', |
||
| 596 | units="days since {0}".format( |
||
| 597 | pd.to_datetime(ref_date, utc=True).isoformat(sep=" "))) |
||
| 598 | sdday_xr["altitude"].attrs = dict(axis='Z', positive='up', |
||
| 599 | long_name='altitude', standard_name='altitude', units='km') |
||
| 600 | sdday_xr["latitude"].attrs = dict(axis='Y', long_name='latitude', |
||
| 601 | standard_name='latitude', units='degrees_north') |
||
| 602 | # Default variable attributes |
||
| 603 | sdday_xr["{0}_DENS".format(species)].attrs = { |
||
| 604 | "units": "cm^{-3}", |
||
| 605 | "long_name": "{0} number density".format(species)} |
||
| 606 | sdday_xr["{0}_ERR".format(species)].attrs = { |
||
| 607 | "units": "cm^{-3}", |
||
| 608 | "long_name": "{0} density measurement error".format(species)} |
||
| 609 | sdday_xr["{0}_ETOT".format(species)].attrs = { |
||
| 610 | "units": "cm^{-3}", |
||
| 611 | "long_name": "{0} density total error".format(species)} |
||
| 612 | sdday_xr["{0}_RSTD".format(species)].attrs = dict( |
||
| 613 | units='%', |
||
| 614 | long_name='{0} relative standard deviation'.format(species)) |
||
| 615 | sdday_xr["{0}_AKDIAG".format(species)].attrs = dict( |
||
| 616 | units='1', |
||
| 617 | long_name='{0} averaging kernel diagonal element'.format(species)) |
||
| 618 | sdday_xr["{0}_APRIORI".format(species)].attrs = dict( |
||
| 619 | units='cm^{-3}', long_name='{0} apriori density'.format(species)) |
||
| 620 | sdday_xr["{0}_NOEM".format(species)].attrs = dict( |
||
| 621 | units='cm^{-3}', long_name='NOEM {0} number density'.format(species)) |
||
| 622 | sdday_xr["{0}_VMR".format(species)].attrs = dict( |
||
| 623 | units='ppb', long_name='{0} volume mixing ratio'.format(species)) |
||
| 624 | sdday_xr["MSIS_Dens"].attrs = dict(units='cm^{-3}', |
||
| 625 | long_name='MSIS total number density', |
||
| 626 | model="NRLMSIS-00") |
||
| 627 | sdday_xr["MSIS_Temp"].attrs = dict(units='K', |
||
| 628 | long_name='MSIS temperature', |
||
| 629 | model="NRLMSIS-00") |
||
| 630 | sdday_xr["longitude"].attrs = dict(long_name='longitude', |
||
| 631 | standard_name='longitude', units='degrees_east') |
||
| 632 | sdday_xr["app_LST"].attrs = dict(units='hours', |
||
| 633 | long_name='apparent local solar time') |
||
| 634 | sdday_xr["mean_LST"].attrs = dict(units='hours', |
||
| 635 | long_name='mean local solar time') |
||
| 636 | sdday_xr["mean_SZA"].attrs = dict(units='degrees', |
||
| 637 | long_name='solar zenith angle at mean altitude') |
||
| 638 | sdday_xr["UTC"].attrs = dict(units='hours', |
||
| 639 | long_name='measurement utc time') |
||
| 640 | sdday_xr["utc_days"].attrs = dict( |
||
| 641 | units='days since {0}'.format( |
||
| 642 | pd.to_datetime(ref_date, utc=True).isoformat(sep=" ")), |
||
| 643 | long_name='measurement utc day') |
||
| 644 | sdday_xr["gm_lats"].attrs = dict(long_name='geomagnetic_latitude', |
||
| 645 | model='IGRF', units='degrees_north') |
||
| 646 | sdday_xr["gm_lons"].attrs = dict(long_name='geomagnetic_longitude', |
||
| 647 | model='IGRF', units='degrees_east') |
||
| 648 | sdday_xr["aacgm_gm_lats"].attrs = dict(long_name='geomagnetic_latitude', |
||
| 649 | # model='AACGM2005 80 km', # v2.3 |
||
| 650 | model='AACGM', # v2.1, v2.2 |
||
| 651 | units='degrees_north') |
||
| 652 | sdday_xr["aacgm_gm_lons"].attrs = dict(long_name='geomagnetic_longitude', |
||
| 653 | # model='AACGM2005 80 km', # v2.3 |
||
| 654 | model='AACGM', # v2.1, v2.2 |
||
| 655 | units='degrees_east') |
||
| 656 | sdday_xr["orbit"].attrs = dict( |
||
| 657 | axis='T', calendar='standard', |
||
| 658 | # long_name='SCIAMACHY/Envisat orbit number', # v2.3 |
||
| 659 | long_name='orbit', # v2.1, v2.2 |
||
| 660 | standard_name="orbit", |
||
| 661 | # units='1', # v2.3 |
||
| 662 | units='orbit number', # v2.1, v2.2 |
||
| 663 | ) |
||
| 664 | # Overwrite version-specific variable attributes |
||
| 665 | for _v, _a in VAR_ATTRS[file_version].items(): |
||
| 666 | sdday_xr[_v].attrs = _a |
||
| 667 | if rename: |
||
| 668 | # version specific renaming |
||
| 669 | sdday_xr = sdday_xr.rename(VAR_RENAME[file_version]) |
||
| 670 | if int(file_version.split(".")[0]) < 3: |
||
| 671 | # invert latitudes for backwards-compatitbility |
||
| 672 | sdday_xr = sdday_xr.sortby("latitude", ascending=False) |
||
| 673 | else: |
||
| 674 | sdday_xr = sdday_xr.sortby("latitude", ascending=True) |
||
| 675 | |||
| 676 | dateo = pd.to_datetime( |
||
| 677 | xr.conventions.decode_cf_variable("date", sdday_xr.time).data[0], |
||
| 678 | utc=True, |
||
| 679 | ).strftime("%Y-%m-%d") |
||
| 680 | logging.debug("date %s dataset: %s", dateo, sdday_xr) |
||
| 681 | return sdday_xr |
||
| 682 | |||
| 683 | |||
| 684 | View Code Duplication | def main(): |
|
| 685 | logging.basicConfig(level=logging.WARNING, |
||
| 686 | format="[%(levelname)-8s] (%(asctime)s) " |
||
| 687 | "%(filename)s:%(lineno)d %(message)s", |
||
| 688 | datefmt="%Y-%m-%d %H:%M:%S %z") |
||
| 689 | |||
| 690 | parser = ap.ArgumentParser() |
||
| 691 | parser.add_argument("file", default="SCIA_NO.nc", |
||
| 692 | help="the filename of the output netcdf file") |
||
| 693 | parser.add_argument("-M", "--month", metavar="YEAR-MM", |
||
| 694 | help="infer start and end dates for month") |
||
| 695 | parser.add_argument("-D", "--date_range", metavar="START_DATE:END_DATE", |
||
| 696 | help="colon-separated start and end dates") |
||
| 697 | parser.add_argument("-d", "--dates", help="comma-separated list of dates") |
||
| 698 | parser.add_argument("-B", "--base_date", |
||
| 699 | metavar="YEAR-MM-DD", default="2000-01-01", |
||
| 700 | help="Reference date to base the time values (days) on " |
||
| 701 | "(default: %(default)s).") |
||
| 702 | parser.add_argument("-f", "--orbit_file", |
||
| 703 | help="the file containing the input orbits") |
||
| 704 | parser.add_argument("-r", "--retrieval_version", default="v6.2", |
||
| 705 | help="SCIAMACHY level 2 data version to process") |
||
| 706 | parser.add_argument("-R", "--file_version", default="2.2", |
||
| 707 | help="Postprocessing format version of the output file") |
||
| 708 | parser.add_argument("-A", "--author", default="unknown", |
||
| 709 | help="Author of the post-processed data set " |
||
| 710 | "(default: %(default)s)") |
||
| 711 | parser.add_argument("-p", "--path", default=None, |
||
| 712 | help="path containing the L2 data") |
||
| 713 | parser.add_argument("-s", "--spectra", default=None, metavar="PATH", |
||
| 714 | help="path containing the L1c spectra") |
||
| 715 | parser.add_argument("-m", "--mlt", action="store_true", default=False, |
||
| 716 | help="indicate nominal (False, default) or MLT data (True)") |
||
| 717 | parser.add_argument("-X", "--xarray", action="store_true", default=False, |
||
| 718 | help="use xarray to prepare the dataset" |
||
| 719 | " (experimental, default %(default)s)") |
||
| 720 | loglevels = parser.add_mutually_exclusive_group() |
||
| 721 | loglevels.add_argument("-l", "--loglevel", default="WARNING", |
||
| 722 | choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], |
||
| 723 | help="change the loglevel (default: 'WARNING')") |
||
| 724 | loglevels.add_argument("-q", "--quiet", action="store_true", default=False, |
||
| 725 | help="less output, same as --loglevel=ERROR (default: False)") |
||
| 726 | loglevels.add_argument("-v", "--verbose", action="store_true", default=False, |
||
| 727 | help="verbose output, same as --loglevel=INFO (default: False)") |
||
| 728 | args = parser.parse_args() |
||
| 729 | if args.quiet: |
||
| 730 | logging.getLogger().setLevel(logging.ERROR) |
||
| 731 | elif args.verbose: |
||
| 732 | logging.getLogger().setLevel(logging.INFO) |
||
| 733 | else: |
||
| 734 | logging.getLogger().setLevel(args.loglevel) |
||
| 735 | |||
| 736 | logging.info("processing L2 version: %s", args.retrieval_version) |
||
| 737 | logging.info("writing data file version: %s", args.file_version) |
||
| 738 | |||
| 739 | pddrange = [] |
||
| 740 | if args.month is not None: |
||
| 741 | d0 = pd.to_datetime(args.month + "-01", utc=True) |
||
| 742 | pddrange += pd.date_range(d0, d0 + pd.tseries.offsets.MonthEnd()) |
||
| 743 | if args.date_range is not None: |
||
| 744 | pddrange += pd.date_range(*args.date_range.split(':')) |
||
| 745 | if args.dates is not None: |
||
| 746 | pddrange += pd.to_datetime(args.dates.split(','), utc=True) |
||
| 747 | logging.debug("pddrange: %s", pddrange) |
||
| 748 | |||
| 749 | olist = [] |
||
| 750 | for date in pddrange: |
||
| 751 | try: |
||
| 752 | olist += get_orbits_from_date(date.strftime("%Y-%m-%d"), |
||
| 753 | mlt=args.mlt, path=args.path, L2_version=args.retrieval_version) |
||
| 754 | except: # handle NaT |
||
| 755 | pass |
||
| 756 | if args.orbit_file is not None: |
||
| 757 | olist += np.genfromtxt(args.orbit_file, dtype=np.int32).tolist() |
||
| 758 | logging.debug("olist: %s", olist) |
||
| 759 | |||
| 760 | if not olist: |
||
| 761 | logging.warn("No orbits to process.") |
||
| 762 | return |
||
| 763 | |||
| 764 | sdlist, sdxr_ds = combine_orbit_data(olist, |
||
| 765 | ref_date=args.base_date, |
||
| 766 | L2_version=args.retrieval_version, file_version=args.file_version, |
||
| 767 | dens_path=args.path, spec_base=args.spectra, use_xarray=args.xarray, |
||
| 768 | save_nc=False) |
||
| 769 | sdlist.author = args.author |
||
| 770 | |||
| 771 | if args.xarray and sdxr_ds is not None: |
||
| 772 | sdxr_ds.attrs["author"] = args.author |
||
| 773 | sd_xr = sddata_xr_set_attrs( |
||
| 774 | sdxr_ds, ref_date=args.base_date, |
||
| 775 | rename=True, file_version=args.file_version, |
||
| 776 | ) |
||
| 777 | sd_xr2 = sdlist.to_xarray() |
||
| 778 | # Overwrite version-specific variable attributes |
||
| 779 | for _v, _a in VAR_ATTRS[args.file_version].items(): |
||
| 780 | sd_xr2[_v].attrs = _a |
||
| 781 | # version specific renaming |
||
| 782 | sd_xr2 = sd_xr2.rename(VAR_RENAME[args.file_version]) |
||
| 783 | logging.debug(sd_xr) |
||
| 784 | logging.debug(sd_xr2) |
||
| 785 | logging.debug("equal datasets: %s", sd_xr.equals(sd_xr2)) |
||
| 786 | xr.testing.assert_allclose(sd_xr, sd_xr2) |
||
| 787 | if sd_xr2 is not None: |
||
| 788 | logging.debug("xarray dataset: %s", sd_xr2) |
||
| 789 | sd_xr2.to_netcdf(args.file, unlimited_dims=["time"]) |
||
| 790 | else: |
||
| 791 | if sdlist.no_dens is not None: |
||
| 792 | sdlist.write_to_netcdf(args.file) |
||
| 793 | else: |
||
| 794 | logging.warn("Processed data is empty.") |
||
| 795 | |||
| 796 | |||
| 797 | if __name__ == "__main__": |
||
| 798 | main() |
||
| 799 |