|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
|
|
# vim:fileencoding=utf-8 |
|
3
|
|
|
# |
|
4
|
|
|
# Copyright (c) 2017-2018 Stefan Bender |
|
5
|
|
|
# |
|
6
|
|
|
# This module is part of sciapy. |
|
7
|
|
|
# sciapy is free software: you can redistribute it or modify |
|
8
|
|
|
# it under the terms of the GNU General Public License as published |
|
9
|
|
|
# by the Free Software Foundation, version 2. |
|
10
|
|
|
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html. |
|
11
|
1 |
|
"""SCIAMACHY regression tool command line options |
|
12
|
|
|
|
|
13
|
|
|
Command line options for the command line tool. |
|
14
|
|
|
""" |
|
15
|
|
|
|
|
16
|
1 |
|
import argparse |
|
17
|
1 |
|
from distutils.util import strtobool |
|
18
|
|
|
|
|
19
|
1 |
|
from ._gpkernels import george_kernels, celerite_terms |
|
20
|
|
|
|
|
21
|
1 |
|
__all__ = ["parser"] |
|
22
|
|
|
|
|
23
|
|
|
|
|
24
|
1 |
|
class Range(object): |
|
25
|
|
|
"""Ranges for floats in command line arguments |
|
26
|
|
|
|
|
27
|
|
|
Helps to properly notify the user if the command line argument |
|
28
|
|
|
is out of range[1]. |
|
29
|
|
|
|
|
30
|
|
|
[1](https://stackoverflow.com/questions/12116685/) |
|
31
|
|
|
""" |
|
32
|
1 |
|
def __init__(self, start, end): |
|
33
|
1 |
|
self.start = start |
|
34
|
1 |
|
self.end = end |
|
35
|
|
|
|
|
36
|
1 |
|
def __eq__(self, other): |
|
37
|
|
|
return self.start <= other <= self.end |
|
38
|
|
|
|
|
39
|
1 |
|
def __getitem__(self, index): |
|
40
|
1 |
|
if index == 0: |
|
41
|
1 |
|
return self |
|
42
|
|
|
else: |
|
43
|
1 |
|
raise IndexError() |
|
44
|
|
|
|
|
45
|
1 |
|
def __repr__(self): |
|
46
|
1 |
|
return '{0}--{1}'.format(self.start, self.end) |
|
47
|
|
|
|
|
48
|
|
|
|
|
49
|
1 |
|
parser = argparse.ArgumentParser(description="SCIAMACHY data regression", |
|
50
|
|
|
prog="scia_regress") |
|
51
|
1 |
|
parser.add_argument("file", default="SCIA_NO.nc", |
|
52
|
|
|
help="The filename of the input netcdf file") |
|
53
|
1 |
|
parser.add_argument("-o", "--output_path", default=".", |
|
54
|
|
|
metavar="PATH", |
|
55
|
|
|
help="The directory for the output files (figures and MCMC samples) " |
|
56
|
|
|
"(default: %(default)s)") |
|
57
|
1 |
|
parser.add_argument("-m", "--name_suffix", default="", |
|
58
|
|
|
help="The suffix for the figure plot files (default: \"\")") |
|
59
|
1 |
|
parser.add_argument("--proxies", metavar="NAME1:FILE1,NAME2:FILE2,...", |
|
60
|
|
|
default="Sol:data/indices/lisird_lya_version3_daily_full.dat," |
|
61
|
|
|
"GM:data/indices/AE_Kyoto_1980-2018_daily2_shift12h.dat", |
|
62
|
|
|
help="Comma separated list of (solar and geomagnetic or other) " |
|
63
|
|
|
"proxies as 'name:file' (default: %(default)s)") |
|
64
|
1 |
|
parser.add_argument("-T", "--fit_lags", default="", type=str, |
|
65
|
|
|
metavar="NAME1,NAME2", |
|
66
|
|
|
help="Fit the proxy lag time " |
|
67
|
|
|
"(comma separated proxy names, e.g. Sol,GM) " |
|
68
|
|
|
"(default: %(default)s)") |
|
69
|
1 |
|
parser.add_argument("-I", "--fit_lifetimes", default="", type=str, |
|
70
|
|
|
metavar="NAME1,NAME2", |
|
71
|
|
|
help="Fit the proxy life time " |
|
72
|
|
|
"(comma separated proxy names, e.g. Sol,GM), " |
|
73
|
|
|
"sets the proxy lag time to zero (default: %(default)s)") |
|
74
|
1 |
|
parser.add_argument("--fit_annlifetimes", default="", type=str, |
|
75
|
|
|
metavar="NAME1,NAME2", |
|
76
|
|
|
help="Fit the proxy annual life time variations " |
|
77
|
|
|
"(comma separated proxy names, e.g. Sol,GM) " |
|
78
|
|
|
"(default: %(default)s)") |
|
79
|
1 |
|
parser.add_argument("--fit_phase", action="store_true", default=False, |
|
80
|
|
|
help="Fit the phase of the harmonic terms directly " |
|
81
|
|
|
"instead of using separate cosine and sine terms " |
|
82
|
|
|
"(default: %(default)s)") |
|
83
|
1 |
|
parser.add_argument("--use_sza", action="store_true", default=False, |
|
84
|
|
|
help="Fit the proxy annual life time variations " |
|
85
|
|
|
"using the (cosine and sine of the) of the solar zenith angle " |
|
86
|
|
|
"instead of the time (default: %(default)s)") |
|
87
|
1 |
|
parser.add_argument("-t", "--lag_times", |
|
88
|
|
|
default="Sol:0,GM:0", type=str, |
|
89
|
|
|
metavar="NAME1:LAG1,NAME2:LAG2", |
|
90
|
|
|
help="Comma-separated list of name:value pairs of fixed proxy lags " |
|
91
|
|
|
"(in fractional years) (default: %(default)s)") |
|
92
|
1 |
|
parser.add_argument("--center_proxies", default="", type=str, |
|
93
|
|
|
metavar="NAME1,NAME2", |
|
94
|
|
|
help="Comma-separated list of proxies to center " |
|
95
|
|
|
"by subtracting the mean (default: %(default)s)") |
|
96
|
1 |
|
parser.add_argument("--log_proxies", default="", type=str, |
|
97
|
|
|
metavar="NAME1,NAME2", |
|
98
|
|
|
help="Comma-separated list of proxies to take the logarithm of " |
|
99
|
|
|
"for fitting (default: %(default)s)") |
|
100
|
1 |
|
parser.add_argument("--positive_proxies", default="", type=str, |
|
101
|
|
|
metavar="NAME1,NAME2", |
|
102
|
|
|
help="Comma-separated list of proxies with positive cofficients. " |
|
103
|
|
|
"Changes the parameter bounds for these proxies accordingly " |
|
104
|
|
|
"(default: %(default)s)") |
|
105
|
1 |
|
parser.add_argument("--norm_proxies_distSEsq", default="", type=str, |
|
106
|
|
|
metavar="NAME1,NAME2", |
|
107
|
|
|
help="Comma-separated list of proxies to be normalized by the " |
|
108
|
|
|
"Sun-Earth distance squared, for example the Lyman-alpha radiation " |
|
109
|
|
|
"(default: %(default)s)") |
|
110
|
1 |
|
parser.add_argument("--norm_proxies_SZA", default="", type=str, |
|
111
|
|
|
metavar="NAME1,NAME2", |
|
112
|
|
|
help="Comma-separated list of proxies to be normalized by the " |
|
113
|
|
|
"the solar zenith angle, for example to adjust the Lyman-alpha " |
|
114
|
|
|
"radiation for the seasonal effects at different latitudes " |
|
115
|
|
|
"(default: %(default)s)") |
|
116
|
1 |
|
parser.add_argument("--time_format", default="jyear", type=str, |
|
117
|
|
|
choices=['jyear', 'decimalyear', 'jd', 'mjd'], |
|
118
|
|
|
help="Treat the time units (proxy and data) according to the given " |
|
119
|
|
|
"astropy.time time format. (default: %(default)s)") |
|
120
|
1 |
|
parser.add_argument("-k", "--fit_offset", action="store_true", default=False, |
|
121
|
|
|
help="Fit an additional offset via regression (default: %(default)s)") |
|
122
|
1 |
|
parser.add_argument("-F", "--freqs", default="1, 2", type=str, |
|
123
|
|
|
metavar="FREQ1,FREQ2", |
|
124
|
|
|
help="Comma separated list of frequencies (in inverse years) to fit " |
|
125
|
|
|
"(default: %(default)s)") |
|
126
|
1 |
|
parser.add_argument("--lifetime_scan", default=0, type=int, |
|
127
|
|
|
help="Number of days to go back to estimate the lifetime. " |
|
128
|
|
|
"If set to zero or negative, the scan range will be set to " |
|
129
|
|
|
"three times the maximum lifetime, including the annual variation " |
|
130
|
|
|
"(default: %(default)s)") |
|
131
|
1 |
|
parser.add_argument("--lifetime_prior", default=None, type=str, |
|
132
|
|
|
choices=[None, 'flat', 'exp', 'normal'], |
|
133
|
|
|
help="The prior probability density for the lifetimes " |
|
134
|
|
|
"(default: %(default)s)") |
|
135
|
1 |
|
parser.add_argument("--lifetime_metric", default=1, type=float, |
|
136
|
|
|
help="The prior probability density metric for the lifetimes in days " |
|
137
|
|
|
"(default: %(default)s)") |
|
138
|
1 |
|
parser.add_argument("--center_data", action="store_true", default=False, |
|
139
|
|
|
help="Center the data by subtracting a global mean (default: %(default)s)") |
|
140
|
1 |
|
parser.add_argument("--initial", metavar="values", default=None, type=str, |
|
141
|
|
|
help="Comma separated list of initial parameter values " |
|
142
|
|
|
"(default: %(default)s)") |
|
143
|
1 |
|
parser.add_argument("-i", "--linearise", action="store_true", default=False, |
|
144
|
|
|
help="Use the linearised version of the model (default: %(default)s).") |
|
145
|
1 |
|
parser.add_argument("-A", "--altitude", metavar="km", |
|
146
|
|
|
type=float, default=72, |
|
147
|
|
|
help="Altitude bin [km] (default: %(default)s)") |
|
148
|
1 |
|
parser.add_argument("-L", "--latitude", metavar="degN", |
|
149
|
|
|
type=float, default=62.5, |
|
150
|
|
|
help="Latitude bin [°N] (default: %(default)s)") |
|
151
|
1 |
|
parser.add_argument("--season", default=None, |
|
152
|
|
|
choices=[None, 'summerNH', 'summerSH'], |
|
153
|
|
|
help="Select a particular season (default: %(default)s)") |
|
154
|
1 |
|
parser.add_argument("--exclude_spe", action="store_true", default=False, |
|
155
|
|
|
help="Exclude pre-defined SPE events (default: %(default)s)") |
|
156
|
1 |
|
parser.add_argument("--akd_threshold", default=0.002, type=float, |
|
157
|
|
|
metavar="VALUE", |
|
158
|
|
|
help="Exclude data with an averaging kernel diagonal element " |
|
159
|
|
|
"smaller than the given threshold (default: %(default)s)") |
|
160
|
1 |
|
parser.add_argument("--cnt_threshold", default=0, type=int, |
|
161
|
|
|
metavar="VALUE", |
|
162
|
|
|
help="Exclude data with less than the given number of measurement points " |
|
163
|
|
|
"in the averaged bin (default: %(default)s)") |
|
164
|
1 |
|
parser.add_argument("-s", "--scale", metavar="factor", |
|
165
|
|
|
type=float, default=1e-6, |
|
166
|
|
|
help="Scale the data by factor prior to fitting (default: %(default)s)") |
|
167
|
1 |
|
parser.add_argument("-r", "--random_subsample", metavar="factor", |
|
168
|
|
|
type=int, default=1, |
|
169
|
|
|
help="Randomly subsample the data by the given factor " |
|
170
|
|
|
"(default: 1, no subsampling)") |
|
171
|
1 |
|
parser.add_argument("--train_fraction", metavar="factor", |
|
172
|
|
|
type=float, default=1, choices=Range(0., 1.), |
|
173
|
|
|
help="Use the given fraction of the data points to train the model " |
|
174
|
|
|
"(default: 1, train on all points)") |
|
175
|
1 |
|
parser.add_argument("--test_fraction", metavar="factor", |
|
176
|
|
|
type=float, default=1, choices=Range(0., 1.), |
|
177
|
|
|
help="Use the given fraction of the data points to test the model " |
|
178
|
|
|
"(default: test on (1 - train_fraction) or all points)") |
|
179
|
1 |
|
parser.add_argument("--random_train_test", dest="random_train_test", |
|
180
|
|
|
action="store_true") |
|
181
|
1 |
|
parser.add_argument("--no-random_train_test", dest="random_train_test", |
|
182
|
|
|
action="store_false", |
|
183
|
|
|
help="Randomize the data before splitting into train and test sets " |
|
184
|
|
|
"(default: %(default)s).") |
|
185
|
1 |
|
parser.set_defaults(random_train_test=False) |
|
186
|
1 |
|
parser.add_argument("--scheduler_address", metavar="address:port", |
|
187
|
|
|
default=None, |
|
188
|
|
|
help="Connect to dask scheduler at address:port " |
|
189
|
|
|
"(default: %(default)s)") |
|
190
|
1 |
|
parser.add_argument("--scheduler_file", metavar="file", |
|
191
|
|
|
default=None, |
|
192
|
|
|
help="Connect to dask scheduler at using the scheduler file " |
|
193
|
|
|
"(default: %(default)s)") |
|
194
|
1 |
|
parser.add_argument("-O", "--optimize", type=int, default="1", |
|
195
|
|
|
choices=range(5), |
|
196
|
|
|
help="Optimize the parameters before MCMC run with method no.: " |
|
197
|
|
|
"0: no optimization, 1: Powell, " |
|
198
|
|
|
"2: differential evolution with latin hypercube initialization, " |
|
199
|
|
|
"3: basin hopping (experimental), and " |
|
200
|
|
|
"4: least squares curve fitting (experimental) " |
|
201
|
|
|
"(default: %(default)s)") |
|
202
|
1 |
|
parser.add_argument("-N", "--openblas_threads", metavar="N", |
|
203
|
|
|
type=int, default=None, |
|
204
|
|
|
help="Use N OpenMP/OpenBlas threads. If not set uses the " |
|
205
|
|
|
"environment settings (OMP_NUM_THREADS and OPENBLAS_NUM_THREADS) " |
|
206
|
|
|
"or the library's default (default: %(default)s)") |
|
207
|
1 |
|
parser.add_argument("-S", "--random_seed", metavar="VALUE", |
|
208
|
|
|
type=int, default=None, |
|
209
|
|
|
help="Use a particular random seed to obtain " |
|
210
|
|
|
"reproducible results (default: %(default)s)") |
|
211
|
1 |
|
group_mcmc = parser.add_argument_group(title="MCMC parameters", |
|
212
|
|
|
description="Fine-tuning of the (optional) MCMC run.") |
|
213
|
1 |
|
group_mcmc.add_argument("-M", "--mcmc", type=strtobool, default="true", |
|
214
|
|
|
help="Fit the parameters with MCMC (default: %(default)s)") |
|
215
|
1 |
|
group_mcmc.add_argument("-w", "--walkers", metavar="N", |
|
216
|
|
|
type=int, default=100, |
|
217
|
|
|
help="Use N MCMC walkers (default: %(default)s)") |
|
218
|
1 |
|
group_mcmc.add_argument("-b", "--burn_in", metavar="N", |
|
219
|
|
|
type=int, default=200, |
|
220
|
|
|
help="Use N MCMC burn-in samples " |
|
221
|
|
|
"(run twice if --optimize is False) (default: %(default)s)") |
|
222
|
1 |
|
group_mcmc.add_argument("-p", "--production", metavar="N", |
|
223
|
|
|
type=int, default=800, |
|
224
|
|
|
help="Use N MCMC production samples (default: %(default)s)") |
|
225
|
1 |
|
group_mcmc.add_argument("-n", "--threads", metavar="N", |
|
226
|
|
|
type=int, default=1, |
|
227
|
|
|
help="Use N MCMC threads (default: %(default)s)") |
|
228
|
1 |
|
group_mcmc.add_argument("-P", "--progress", action="store_true", default=False, |
|
229
|
|
|
help="Show MCMC sampler progress (default: %(default)s)") |
|
230
|
1 |
|
group_gp = parser.add_argument_group(title="GP parameters", |
|
231
|
|
|
description="Fine-tuning of the (optional) Gaussian Process parameters.") |
|
232
|
1 |
|
group_gp.add_argument("-g", "--george", action="store_true", default=False, |
|
233
|
|
|
help="Optimize a Gaussian Process model of the correlations " |
|
234
|
|
|
"using the `celerite` (not set) or `george` GP packages " |
|
235
|
|
|
"(default: %(default)s)") |
|
236
|
1 |
|
group_gp.add_argument("-K", "--kernels", default="", type=str, |
|
237
|
|
|
help="Comma separated list of Gaussian Process kernels to use. " |
|
238
|
|
|
"They will be combined linearly (default: %(default)s) " |
|
239
|
|
|
"Possible choices are: {0} for george (-g) and {1} for celerite" |
|
240
|
|
|
.format(sorted(map(str, george_kernels.keys())), |
|
241
|
|
|
sorted(map(str, celerite_terms.keys())))) |
|
242
|
1 |
|
group_gp.add_argument("-B", "--fit_bias", action="store_true", default=False, |
|
243
|
|
|
help="Fit bias using a constant kernel (default: %(default)s)") |
|
244
|
1 |
|
group_gp.add_argument("-W", "--fit_white", action="store_true", default=False, |
|
245
|
|
|
help="Fit additional white noise (default: %(default)s)") |
|
246
|
1 |
|
group_gp.add_argument("-H", "--HODLR_Solver", action="store_true", default=False, |
|
247
|
|
|
help="Use the HODLR solver for the GP fit (default: %(default)s)") |
|
248
|
1 |
|
group_save = parser.add_argument_group(title="Output options", |
|
249
|
|
|
description="Diagnostic output and figures.") |
|
250
|
1 |
|
group_save.add_argument("--save_model", dest="save_model", action="store_true") |
|
251
|
1 |
|
group_save.add_argument("--no-save_model", dest="save_model", action="store_false", |
|
252
|
|
|
help="Saves a pickled version of the Model (default: %(default)s).") |
|
253
|
1 |
|
group_save.add_argument("--save_samples", dest="save_samples", action="store_true") |
|
254
|
1 |
|
group_save.add_argument("--no-save_samples", dest="save_samples", action="store_false", |
|
255
|
|
|
help="Saves the MCMC samples to disk (see --sample_format) " |
|
256
|
|
|
"(default: %(default)s).") |
|
257
|
1 |
|
group_save.add_argument("--samples_format", default="netcdf4", |
|
258
|
|
|
choices=['npz', 'h5', 'hdf5', 'nc', 'netcdf4'], |
|
259
|
|
|
help="File format for the samples, compressed .npz or netcdf4 (hdf5) " |
|
260
|
|
|
"(h5 and hdf5 will also save to netcdf4 files but named \".h5\") " |
|
261
|
|
|
"(default: %(default)s).") |
|
262
|
1 |
|
group_save.add_argument("--plot_corner", dest="plot_corner", action="store_true") |
|
263
|
1 |
|
group_save.add_argument("--no-plot_corner", dest="plot_corner", action="store_false", |
|
264
|
|
|
help="Plot the fitted parameter distributions as a corner plot " |
|
265
|
|
|
"(default: %(default)s).") |
|
266
|
1 |
|
group_save.add_argument("--plot_samples", dest="plot_samples", action="store_true") |
|
267
|
1 |
|
group_save.add_argument("--no-plot_samples", dest="plot_samples", action="store_false", |
|
268
|
|
|
help="Plot sample predictions using the fitted parameters " |
|
269
|
|
|
"(default: %(default)s).") |
|
270
|
1 |
|
group_save.add_argument("--plot_median", dest="plot_median", action="store_true") |
|
271
|
1 |
|
group_save.add_argument("--no-plot_median", dest="plot_median", action="store_false", |
|
272
|
|
|
help="Plot median prediction and the residuals combined " |
|
273
|
|
|
"(default: %(default)s).") |
|
274
|
1 |
|
group_save.add_argument("--plot_residuals", dest="plot_residuals", action="store_true") |
|
275
|
1 |
|
group_save.add_argument("--no-plot_residuals", dest="plot_residuals", action="store_false", |
|
276
|
|
|
help="Plot standalone median prediction residuals " |
|
277
|
|
|
"(default: %(default)s).") |
|
278
|
1 |
|
group_save.add_argument("--plot_maxlnp", dest="plot_maxlnp", action="store_true") |
|
279
|
1 |
|
group_save.add_argument("--no-plot_maxlnp", dest="plot_maxlnp", action="store_false", |
|
280
|
|
|
help="Plot the maximum posterior prediction and the residuals combined " |
|
281
|
|
|
"(default: %(default)s).") |
|
282
|
1 |
|
group_save.add_argument("--plot_maxlnpres", dest="plot_maxlnpres", action="store_true") |
|
283
|
1 |
|
group_save.add_argument("--no-plot_maxlnpres", dest="plot_maxlnpres", action="store_false", |
|
284
|
|
|
help="Plot standalone maximum posterior prediction residuals " |
|
285
|
|
|
"(default: %(default)s).") |
|
286
|
1 |
|
group_save.set_defaults(save_model=False, save_samples="netcdf4", |
|
287
|
|
|
plot_corner=True, plot_samples=True, plot_median=False, |
|
288
|
|
|
plot_residuals=False, plot_maxlnp=True, plot_maxlnpres=False) |
|
289
|
1 |
|
loglevels = parser.add_mutually_exclusive_group() |
|
290
|
1 |
|
loglevels.add_argument("-q", "--quiet", action="store_true", default=False, |
|
291
|
|
|
help="less output, same as --loglevel=ERROR (default: %(default)s)") |
|
292
|
1 |
|
loglevels.add_argument("-v", "--verbose", action="store_true", default=False, |
|
293
|
|
|
help="verbose output, same as --loglevel=INFO (default: %(default)s)") |
|
294
|
1 |
|
loglevels.add_argument("-l", "--loglevel", default="WARNING", |
|
295
|
|
|
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], |
|
296
|
|
|
help="change the loglevel (default: %(default)s)") |
|
297
|
|
|
|