|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- coding: utf-8 -*- |
|
3
|
|
|
# vim:fileencoding=utf-8 |
|
4
|
|
|
# |
|
5
|
|
|
# Copyright (c) 2017-2018 Stefan Bender |
|
6
|
|
|
# |
|
7
|
|
|
# This file is part of sciapy. |
|
8
|
|
|
# sciapy is free software: you can redistribute it or modify it |
|
9
|
|
|
# under the terms of the GNU General Public License as published by |
|
10
|
|
|
# the Free Software Foundation, version 2. |
|
11
|
|
|
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html. |
|
12
|
|
|
"""SCIAMACHY level 2 binning |
|
13
|
|
|
|
|
14
|
|
|
Command line interface to SCIAMACHY level 2 data binning, |
|
15
|
|
|
both in time (daily) and (geomagnetic) latitude. |
|
16
|
|
|
""" |
|
17
|
|
|
|
|
18
|
|
|
import argparse as ap |
|
19
|
|
|
import datetime as dt |
|
20
|
|
|
import logging |
|
21
|
|
|
from os import path |
|
22
|
|
|
|
|
23
|
|
|
import numpy as np |
|
24
|
|
|
import pandas as pd |
|
25
|
|
|
import xarray as xr |
|
26
|
|
|
try: |
|
27
|
|
|
from dask import compute, delayed |
|
28
|
|
|
from dask.distributed import Client |
|
29
|
|
|
except ImportError: |
|
30
|
|
|
delayed = None |
|
31
|
|
|
|
|
32
|
|
|
from sciapy.level2.binning import bin_lat_timeavg |
|
33
|
|
|
|
|
34
|
|
|
# non-sensible variables to drop |
|
35
|
|
|
_drop_vars = ["NO_RSTD_cnt", "NO_RSTD_std"] |
|
36
|
|
|
|
|
37
|
|
|
|
|
38
|
|
|
if __name__ == "__main__": |
|
39
|
|
|
logging.basicConfig(level=logging.WARNING, |
|
40
|
|
|
format="[%(levelname)-8s] (%(asctime)s) %(filename)s:%(lineno)d %(message)s", |
|
41
|
|
|
datefmt="%Y-%m-%d %H:%M:%S %z") |
|
42
|
|
|
|
|
43
|
|
|
parser = ap.ArgumentParser() |
|
44
|
|
|
parser.add_argument("file", default="SCIA_NO.nc", |
|
45
|
|
|
help="the filename of the input netcdf file") |
|
46
|
|
|
parser.add_argument("-a", "--area_weighted", |
|
47
|
|
|
action="store_true", default=True, |
|
48
|
|
|
help="calculate the area-weighted mean within the bins") |
|
49
|
|
|
parser.add_argument("-u", "--unweighted", |
|
50
|
|
|
dest="area_weighted", action="store_false", |
|
51
|
|
|
help="calculate the equally weighted mean within the bins") |
|
52
|
|
|
parser.add_argument("-g", "--geomagnetic", |
|
53
|
|
|
dest="geomag", action="store_true", default=False, |
|
54
|
|
|
help="bin according to geomagnetic latitude instead of " |
|
55
|
|
|
"geographic latitude (turns off area weighting). " |
|
56
|
|
|
"(default: %(default)s)") |
|
57
|
|
|
parser.add_argument("-G", "--bin_var", type=str, default=None, |
|
58
|
|
|
help="bin according to the variable given instead of " |
|
59
|
|
|
"geographic latitude (turns off area weighting).") |
|
60
|
|
|
parser.add_argument("-b", "--bins", metavar="START:END:SIZE", |
|
61
|
|
|
default="-90:90:5", |
|
62
|
|
|
help="bins from START to END (inclusive both) with into SIZE sized bins " |
|
63
|
|
|
"(default: %(default)s)") |
|
64
|
|
|
parser.add_argument("-B", "--binn", metavar="START:END:NUM", |
|
65
|
|
|
default=None, |
|
66
|
|
|
help="bins from START to END (inclusive both) into NUM bins " |
|
67
|
|
|
"(default: not used)") |
|
68
|
|
|
parser.add_argument("-m", "--mlt", action="store_true", default=False, |
|
69
|
|
|
help="indicate whether to deal with nominal or MLT data (default: False)") |
|
70
|
|
|
parser.add_argument("-o", "--output", help="filename of the output file") |
|
71
|
|
|
parser.add_argument("-t", "--akm_threshold", type=float, default=0.002, |
|
72
|
|
|
help="the averaging kernel diagonal element threshold " |
|
73
|
|
|
"for the mask calculation " |
|
74
|
|
|
"(default: %(default)s)") |
|
75
|
|
|
parser.add_argument("-j", "--jobs", metavar="N", type=int, default=1, |
|
76
|
|
|
help="Use N parallel threads for binning " |
|
77
|
|
|
"(default: %(default)s)") |
|
78
|
|
|
loglevels = parser.add_mutually_exclusive_group() |
|
79
|
|
|
loglevels.add_argument("-l", "--loglevel", default="WARNING", |
|
80
|
|
|
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], |
|
81
|
|
|
help="change the loglevel " |
|
82
|
|
|
"(default: %(default)s)") |
|
83
|
|
|
loglevels.add_argument("-q", "--quiet", action="store_true", default=False, |
|
84
|
|
|
help="less output, same as --loglevel=ERROR " |
|
85
|
|
|
"(default: %(default)s)") |
|
86
|
|
|
loglevels.add_argument("-v", "--verbose", action="store_true", default=False, |
|
87
|
|
|
help="verbose output, same as --loglevel=INFO " |
|
88
|
|
|
"(default: %(default)s)") |
|
89
|
|
|
args = parser.parse_args() |
|
90
|
|
|
if args.quiet: |
|
91
|
|
|
logging.getLogger().setLevel(logging.ERROR) |
|
92
|
|
|
elif args.verbose: |
|
93
|
|
|
logging.getLogger().setLevel(logging.INFO) |
|
94
|
|
|
else: |
|
95
|
|
|
logging.getLogger().setLevel(args.loglevel) |
|
96
|
|
|
|
|
97
|
|
|
orbit_filename = args.file |
|
98
|
|
|
|
|
99
|
|
|
# geomagnetic/geographic setup |
|
100
|
|
|
if args.geomag: |
|
101
|
|
|
logging.debug("using default geomagnetic latitudes") |
|
102
|
|
|
binvar = "gm_lats" |
|
103
|
|
|
args.area_weighted = False |
|
104
|
|
|
elif args.bin_var is not None: |
|
105
|
|
|
logging.debug("using custom latitude variable") |
|
106
|
|
|
binvar = args.bin_var |
|
107
|
|
|
args.area_weighted = False |
|
108
|
|
|
else: |
|
109
|
|
|
logging.debug("using default geographic latitudes") |
|
110
|
|
|
binvar = "latitude" |
|
111
|
|
|
logging.info("binning according to \"%s\"", binvar) |
|
112
|
|
|
lats_rename_dict = {"{0}_bins".format(binvar): "latitude"} |
|
113
|
|
|
|
|
114
|
|
|
if args.area_weighted: |
|
115
|
|
|
logging.info("area weighted bins") |
|
116
|
|
|
else: |
|
117
|
|
|
logging.info("equally weighted bins") |
|
118
|
|
|
|
|
119
|
|
|
if args.binn is None: |
|
120
|
|
|
bin0, bin1, binstep = list(map(float, args.bins.split(':'))) |
|
121
|
|
|
bins = np.r_[bin0:bin1 + 0.5 * binstep:binstep] |
|
122
|
|
|
else: |
|
123
|
|
|
bin0, bin1, binnum = list(map(float, args.binn.split(':'))) |
|
124
|
|
|
bins = np.linspace(bin0, bin1, binnum + 1) |
|
125
|
|
|
binstep = bins.diff[0] |
|
126
|
|
|
logging.debug("using %s deg sized bins: %s", binstep, bins) |
|
127
|
|
|
|
|
128
|
|
|
if args.output is None: |
|
129
|
|
|
output = ("scia_dzm_{0}_akm{1:.3f}_{2}{3:.0f}_{4}.nc" |
|
130
|
|
|
.format("".join(c if c.isalnum() else '_' |
|
131
|
|
|
for c in path.basename(orbit_filename[:-3])), |
|
132
|
|
|
args.akm_threshold, |
|
133
|
|
|
"geomag" if args.geomag else "geogra", |
|
134
|
|
|
binstep, |
|
135
|
|
|
"aw" if args.area_weighted else "nw")) |
|
136
|
|
|
else: |
|
137
|
|
|
output = args.output |
|
138
|
|
|
logging.info("saving to: %s", output) |
|
139
|
|
|
|
|
140
|
|
|
ds = xr.open_mfdataset(orbit_filename, decode_times=False, |
|
141
|
|
|
chunks={"time": 820, "latitude": 18, "altitude": 17}) |
|
142
|
|
|
ds["longitude"].values = ds.longitude.values % 360. |
|
143
|
|
|
|
|
144
|
|
|
if args.mlt: |
|
145
|
|
|
# construct the time (day) bin edges from jumps in the time variable |
|
146
|
|
|
# works reliably only for the MLT data |
|
147
|
|
|
time_rename_dict = {"time_bins": "time"} |
|
148
|
|
|
tbin_edges = np.concatenate([[ds.time.values[0] - 0.5], |
|
149
|
|
|
ds.time.values[np.where(np.diff(ds.time) > 1)] + 0.01, |
|
150
|
|
|
[ds.time.values[-1] + 0.5]]) |
|
151
|
|
|
tbin_labels = ds.time.groupby_bins("time", tbin_edges).mean("time").values |
|
152
|
|
|
ds_bins_daily_gb = ds.groupby_bins("time", tbin_edges, labels=tbin_labels) |
|
153
|
|
|
else: |
|
154
|
|
|
time_rename_dict = {"date": "time"} |
|
155
|
|
|
ds["time"] = xr.conventions.decode_cf_variable("time", ds.time) |
|
156
|
|
|
# ds.groupby("time.date") does not work anymore :( |
|
157
|
|
|
ds_bins_daily_gb = ds.groupby( |
|
158
|
|
|
xr.DataArray( |
|
159
|
|
|
pd.to_datetime(pd.DatetimeIndex(ds.time.data).date), |
|
160
|
|
|
coords=[ds.time], dims=["time"], name="date")) |
|
161
|
|
|
|
|
162
|
|
|
if args.jobs > 1 and delayed is not None: |
|
163
|
|
|
# use dask.delayed and dask.compute to distribute the binning |
|
164
|
|
|
logging.info("multi-threaded binning with dask using %s threads", |
|
165
|
|
|
args.jobs) |
|
166
|
|
|
binned = (delayed(bin_lat_timeavg)( |
|
167
|
|
|
ds, binvar=binvar, |
|
168
|
|
|
bins=bins, area_weighted=args.area_weighted) |
|
169
|
|
|
for _, ds in iter(ds_bins_daily_gb)) |
|
170
|
|
|
client = Client() |
|
171
|
|
|
logging.info("dask.distributed client: %s", client) |
|
172
|
|
|
ds_bins_daily = (ds_bins_daily_gb |
|
173
|
|
|
._combine(compute(*binned, num_workers=args.jobs)) |
|
174
|
|
|
.rename(lats_rename_dict) |
|
175
|
|
|
.drop(_drop_vars)) |
|
176
|
|
|
else: |
|
177
|
|
|
# serial binning with .apply() |
|
178
|
|
|
logging.info("single-threaded binning") |
|
179
|
|
|
ds_bins_daily = (ds_bins_daily_gb |
|
180
|
|
|
.apply(bin_lat_timeavg, |
|
181
|
|
|
binvar=binvar, bins=bins, |
|
182
|
|
|
area_weighted=args.area_weighted) |
|
183
|
|
|
.rename(lats_rename_dict) |
|
184
|
|
|
.drop(_drop_vars)) |
|
185
|
|
|
logging.info("finished binning.") |
|
186
|
|
|
del ds_bins_daily_gb |
|
187
|
|
|
|
|
188
|
|
|
ds_bins_daily = ds_bins_daily.rename(time_rename_dict) |
|
189
|
|
|
ds_bins_daily["time"].attrs = ds["time"].attrs |
|
190
|
|
|
ds_bins_daily["time"].attrs.update( |
|
191
|
|
|
{"axis": "T", "long_name": "measurement date"}) |
|
192
|
|
|
|
|
193
|
|
|
# construct tha mask from the averaging kernel diagonal elements |
|
194
|
|
|
ds_bins_daily["NO_MASK"] = (ds_bins_daily.NO_AKDIAG < args.akm_threshold) |
|
195
|
|
|
ds_bins_daily["NO_MASK"].attrs = {"long_name": "density mask", "units": "1"} |
|
196
|
|
|
|
|
197
|
|
|
# copy coordinate attributes |
|
198
|
|
|
# "time" was already set above |
|
199
|
|
|
for var in filter(lambda c: c != "time", ds.coords): |
|
200
|
|
|
logging.debug("copying coordinate attributes for: %s", var) |
|
201
|
|
|
ds_bins_daily[var].attrs = ds[var].attrs |
|
202
|
|
|
if args.geomag: |
|
203
|
|
|
ds_bins_daily["latitude"].attrs.update( |
|
204
|
|
|
{"long_name": "geomagnetic_latitude"}) |
|
205
|
|
|
# copy global attributes |
|
206
|
|
|
ds_bins_daily.attrs = ds.attrs |
|
207
|
|
|
# note binning time |
|
208
|
|
|
ds_bins_daily.attrs["binned_on"] = (dt.datetime.utcnow() |
|
209
|
|
|
.replace(tzinfo=dt.timezone.utc) |
|
210
|
|
|
.strftime("%a %b %d %Y %H:%M:%S %Z")) |
|
211
|
|
|
ds_bins_daily.attrs["latitude_bin_type"] = \ |
|
212
|
|
|
"geomagnetic" if args.geomag else "geographic" |
|
213
|
|
|
|
|
214
|
|
|
ds_bins_daily.to_netcdf(output, unlimited_dims=["time"]) |
|
215
|
|
|
|