Passed
Push — master ( b988a0...edd9bf )
by Stefan
05:54
created

sciapy.regress.__main__   F

Complexity

Total Complexity 65

Size/Duplication

Total Lines 536
Duplicated Lines 90.11 %

Test Coverage

Coverage 8.68%

Importance

Changes 0
Metric Value
eloc 389
dl 483
loc 536
ccs 25
cts 288
cp 0.0868
rs 3.2
c 0
b 0
f 0
wmc 65

4 Functions

Rating   Name   Duplication   Size   Complexity  
A _r_sun_earth() 24 24 1
A _train_test_split() 29 29 3
F main() 387 387 54
B save_samples_netcdf() 43 43 7

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like sciapy.regress.__main__ often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2017-2018 Stefan Bender
5
#
6
# This module is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify
8
# it under the terms of the GNU General Public License as published
9
# by the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11 1
"""SCIAMACHY data regression command line interface
12
13
Command line main program for regression analysis of SCIAMACHY
14
daily zonal mean time series (NO for now).
15
"""
16
17 1
import ctypes
18 1
import logging
19
20 1
import numpy as np
21 1
import scipy.optimize as op
22 1
from scipy.interpolate import interp1d
23
24 1
import george
25 1
import celerite
26
27 1
import matplotlib as mpl
28
# switch off X11 rendering
29 1
mpl.use("Agg")
30
31 1
from .load_data import load_solar_gm_table, load_scia_dzm
32 1
from .models_cel import trace_gas_model
33 1
from .mcmc import mcmc_sample_model
34 1
from .statistics import mcmc_statistics
35
36 1
from ._gpkernels import (george_solvers,
37
		setup_george_kernel, setup_celerite_terms)
38 1
from ._plot import (plot_single_sample_and_residuals,
39
		plot_residual, plot_random_samples)
40 1
from ._options import parser
41
42
43 1 View Code Duplication
def save_samples_netcdf(filename, model, alt, lat, samples,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
44
		scale=1e-6,
45
		lnpost=None, compressed=False):
46
	from xarray import Dataset
47
	smpl_ds = Dataset(dict([(pname, (["lat", "alt", "sample"],
48
				samples[..., i].reshape(1, 1, -1)))
49
				for i, pname in enumerate(model.get_parameter_names())]
50
				# + [("lpost", (["lat", "alt", "sample"], lnp.reshape(1, 1, -1)))]
51
			),
52
			coords={"lat": [lat], "alt": [alt]})
53
54
	for modn in model.mean.models:
55
		modl = model.mean.models[modn]
56
		if hasattr(modl, "mean"):
57
			smpl_ds.attrs[modn + ":mean"] = modl.mean
58
59
	units = {"kernel": {
60
				"log": "log(10$^{{{0:.0f}}}$ cm$^{{-3}}$)"
61
						.format(-np.log10(scale))},
62
			"mean": {
63
				"log": "log(10$^{{{0:.0f}}}$ cm$^{{-3}}$)"
64
						.format(-np.log10(scale)),
65
				"cos": "10$^{{{0:.0f}}}$ cm$^{{-3}}$".format(-np.log10(scale)),
66
				"sin": "10$^{{{0:.0f}}}$ cm$^{{-3}}$".format(-np.log10(scale)),
67
				"val": "10$^{{{0:.0f}}}$ cm$^{{-3}}$".format(-np.log10(scale)),
68
				"amp": "10$^{{{0:.0f}}}$ cm$^{{-3}}$".format(-np.log10(scale)),
69
				"tau": "d"}}
70
	for pname in smpl_ds.data_vars:
71
		_pp = pname.split(':')
72
		for _n, _u in units[_pp[0]].items():
73
			if _pp[-1].startswith(_n):
74
				logging.debug("units for %s: %s", pname, _u)
75
				smpl_ds[pname].attrs["units"] = _u
76
77
	smpl_ds["alt"].attrs = {"long_name": "altitude", "units": "km"}
78
	smpl_ds["lat"].attrs = {"long_name": "latitude", "units": "degrees_north"}
79
80
	_encoding = None
81
	if compressed:
82
		_encoding = {var: {"zlib": True, "complevel": 1}
83
					for var in smpl_ds.data_vars}
84
	smpl_ds.to_netcdf(filename, encoding=_encoding)
85
	smpl_ds.close()
86
87
88 1 View Code Duplication
def _train_test_split(times, data, errs, train_frac,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
89
		test_frac, randomize):
90
	# split the data into training and test subsets according to the
91
	# fraction given (default is 1, i.e. no splitting)
92
	ndata = len(times)
93
	train_size = int(ndata * train_frac)
94
	test_size = min(ndata - train_size, int(ndata * test_frac))
95
	# randomize if requested
96
	if randomize:
97
		permut_idx = np.random.permutation(np.arange(ndata))
98
	else:
99
		permut_idx = np.arange(ndata)
100
	train_idx = np.sort(permut_idx[:train_size])
101
	test_idx = np.sort(permut_idx[train_size:train_size + test_size])
102
	times_train = times[train_idx]
103
	data_train = data[train_idx]
104
	errs_train = errs[train_idx]
105
	if test_size > 0:
106
		times_test = times[test_idx]
107
		data_test = data[test_idx]
108
		errs_test = errs[test_idx]
109
	else:
110
		times_test = times
111
		data_test = data
112
		errs_test = errs
113
	logging.info("using %s of %s samples for training.", len(times_train), ndata)
114
	logging.info("using %s of %s samples for testing.", len(times_test), ndata)
115
	return (times_train, data_train, errs_train,
116
			times_test, data_test, errs_test)
117
118
119 1 View Code Duplication
def _r_sun_earth(time, tfmt="jyear"):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
120
	"""First order approximation of the Sun-Earth distance
121
122
	The Sun-to-Earth distance can be used to (un-)normalize proxies
123
	to the actual distance to the Sun instead of 1 AU.
124
125
	Parameters
126
	----------
127
	time : float
128
		Time value in the units given by 'tfmt'.
129
	tfmt : str, optional
130
		The units of 'time' as supported by the
131
		astropy.time time formats. Default: 'jyear'.
132
133
	Returns
134
	-------
135
	dist : float
136
		The Sun-Earth distance at the given day of year in AU.
137
	"""
138
	from astropy.time import Time
139
	tdoy = Time(time, format=tfmt)
140
	tdoy.format = "yday"
141
	doy = int(tdoy.value.split(':')[1])
142
	return 1 - 0.01672 * np.cos(2 * np.pi / 365.256363 * (doy - 4))
143
144
145 1 View Code Duplication
def main():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
146 1
	logging.basicConfig(level=logging.WARNING,
147
			format="[%(levelname)-8s] (%(asctime)s) "
148
				"%(filename)s:%(lineno)d %(message)s",
149
			datefmt="%Y-%m-%d %H:%M:%S %z")
150
151 1
	args = parser.parse_args()
152
153
	logging.info("command line arguments: %s", args)
154
	if args.quiet:
155
		logging.getLogger().setLevel(logging.ERROR)
156
	elif args.verbose:
157
		logging.getLogger().setLevel(logging.INFO)
158
	else:
159
		logging.getLogger().setLevel(args.loglevel)
160
161
	from numpy.distutils.system_info import get_info
162
	for oblas_path in get_info("openblas")["library_dirs"]:
163
		oblas_name = "{0}/libopenblas.so".format(oblas_path)
164
		logging.info("Trying %s", oblas_name)
165
		try:
166
			oblas_lib = ctypes.cdll.LoadLibrary(oblas_name)
167
			oblas_cores = oblas_lib.openblas_get_num_threads()
168
			oblas_lib.openblas_set_num_threads(args.openblas_threads)
169
			logging.info("Using %s/%s Openblas thread(s).",
170
					oblas_lib.openblas_get_num_threads(), oblas_cores)
171
		except:
172
			logging.info("Setting number of openblas threads failed.")
173
174
	if args.random_seed is not None:
175
		np.random.seed(args.random_seed)
176
177
	if args.proxies:
178
		proxies = args.proxies.split(',')
179
		proxy_dict = dict(_p.split(':') for _p in proxies)
180
	else:
181
		proxy_dict = {}
182
	lag_dict = {pn: 0 for pn in proxy_dict.keys()}
183
184
	# Post-processing of arguments...
185
	# List of proxy lag fits from csv
186
	fit_lags = args.fit_lags.split(',')
187
	# List of proxy lifetime fits from csv
188
	fit_lifetimes = args.fit_lifetimes.split(',')
189
	fit_annlifetimes = args.fit_annlifetimes.split(',')
190
	# List of proxy lag times from csv
191
	lag_dict.update(dict(_ls.split(':') for _ls in args.lag_times.split(',')))
192
	# List of cycles (frequencies in 1/year) from argument list (csv)
193
	try:
194
		freqs = list(map(float, args.freqs.split(',')))
195
	except ValueError:
196
		freqs = []
197
	# List of initial parameter values
198
	initial = None
199
	if args.initial is not None:
200
		try:
201
			initial = list(map(float, args.initial.split(',')))
202
		except ValueError:
203
			pass
204
	# List of GP kernels from argument list (csv)
205
	kernls = args.kernels.split(',')
206
207
	lat = args.latitude
208
	alt = args.altitude
209
	logging.info("location: %.0f°N %.0f km", lat, alt)
210
211
	no_ys, no_dens, no_errs, no_szas = load_scia_dzm(args.file, alt, lat,
212
			tfmt=args.time_format,
213
			scale=args.scale,
214
			#subsample_factor=args.random_subsample,
215
			#subsample_method="random",
216
			akd_threshold=args.akd_threshold,
217
			cnt_threshold=args.cnt_threshold,
218
			center=args.center_data,
219
			season=args.season,
220
			SPEs=args.exclude_spe)
221
222
	(no_ys_train, no_dens_train, no_errs_train,
223
		no_ys_test, no_dens_test, no_errs_test) = _train_test_split(
224
				no_ys, no_dens, no_errs, args.train_fraction,
225
				args.test_fraction, args.random_train_test)
226
227
	sza_intp = interp1d(no_ys, no_szas, bounds_error=False)
228
229
	max_amp = 1e10 * args.scale
230
	max_days = 100
231
232
	proxy_config = {}
233
	for pn, pf in proxy_dict.items():
234
		pt, pp = load_solar_gm_table(pf, cols=[0, 1], names=["time", pn], tfmt=args.time_format)
235
		# use log of proxy values
236
		pv = np.log(pp[pn]) if pn in args.log_proxies.split(',') else pp[pn]
237
		# normalize to sun--earth distance squared
238
		if pn in args.norm_proxies_distSEsq.split(','):
239
			rad_sun_earth = np.vectorize(_r_sun_earth)(pt, tfmt=args.time_format)
240
			pv /= rad_sun_earth**2
241
		# normalize by cos(SZA)
242
		if pn in args.norm_proxies_SZA.split(',') and sza_intp is not None:
243
			pv *= np.cos(np.radians(sza_intp(pt)))
244
		proxy_config.update({pn:
245
			dict(times=pt, values=pv,
246
				center=pn in args.center_proxies.split(','),
247
				positive=pn in args.positive_proxies.split(','),
248
				lag=float(lag_dict[pn]),
249
				max_amp=max_amp, max_days=max_days,
250
				sza_intp=sza_intp if args.use_sza else None,
251
			)}
252
		)
253
254
	model = trace_gas_model(constant=args.fit_offset,
255
			proxy_config=proxy_config, **vars(args))
256
257
	logging.debug("model dict: %s", model.get_parameter_dict())
258
	model.freeze_all_parameters()
259
	# thaw parameters according to requested fits
260
	for pn in proxy_dict.keys():
261
		model.thaw_parameter("{0}:amp".format(pn))
262
		if pn in fit_lags:
263
			model.thaw_parameter("{0}:lag".format(pn))
264
		if pn in fit_lifetimes:
265
			model.set_parameter("{0}:tau0".format(pn), 1e-3)
266
			model.thaw_parameter("{0}:tau0".format(pn))
267
			if pn in fit_annlifetimes:
268
				model.thaw_parameter("{0}:taucos1".format(pn))
269
				model.thaw_parameter("{0}:tausin1".format(pn))
270
	for freq in freqs:
271
		if not args.fit_phase:
272
			model.thaw_parameter("f{0:.0f}:cos".format(freq))
273
			model.thaw_parameter("f{0:.0f}:sin".format(freq))
274
		else:
275
			model.thaw_parameter("f{0:.0f}:amp".format(freq))
276
			model.thaw_parameter("f{0:.0f}:phase".format(freq))
277
	if args.fit_offset:
278
		#model.set_parameter("offset:value", -100.)
279
		#model.set_parameter("offset:value", 0)
280
		model.thaw_parameter("offset:value")
281
282
	if initial is not None:
283
		model.set_parameter_vector(initial)
284
	# model.thaw_parameter("GM:ltscan")
285
	logging.debug("params: %s", model.get_parameter_dict())
286
	logging.debug("param names: %s", model.get_parameter_names())
287
	logging.debug("param vector: %s", model.get_parameter_vector())
288
	logging.debug("param bounds: %s", model.get_parameter_bounds())
289
	#logging.debug("model value: %s", model.get_value(no_ys))
290
	#logging.debug("default log likelihood: %s", model.log_likelihood(model.vector))
291
292
	# setup the Gaussian Process kernel
293
	kernel_base = (1e7 * args.scale)**2
294
	ksub = args.name_suffix
295
296
	solver = "basic"
297
	skwargs = {}
298
	if args.HODLR_Solver:
299
		solver = "HODLR"
300
		#skwargs = {"tol": 1e-3}
301
302
	if args.george:
303
		gpname, kernel = setup_george_kernel(kernls,
304
				kernel_base=kernel_base, fit_bias=args.fit_bias)
305
		gpmodel = george.GP(kernel, mean=model,
306
			white_noise=1.e-25, fit_white_noise=args.fit_white,
307
			solver=george_solvers[solver], **skwargs)
308
		# the george interface does not allow setting the bounds in
309
		# the kernel initialization so we prepare simple default bounds
310
		kernel_bounds = [(-0.3 * max_amp, 0.3 * max_amp)
311
				for _ in gpmodel.kernel.get_parameter_names()]
312
		bounds = gpmodel.mean.get_parameter_bounds() + kernel_bounds
313
	else:
314
		gpname, cel_terms = setup_celerite_terms(kernls,
315
				fit_bias=args.fit_bias, fit_white=args.fit_white)
316
		gpmodel = celerite.GP(cel_terms, mean=model,
317
			fit_white_noise=args.fit_white,
318
			fit_mean=True)
319
		bounds = gpmodel.get_parameter_bounds()
320
	gpmodel.compute(no_ys_train, no_errs_train)
321
	logging.debug("gpmodel params: %s", gpmodel.get_parameter_dict())
322
	logging.debug("gpmodel bounds: %s", bounds)
323
	logging.debug("initial log likelihood: %s", gpmodel.log_likelihood(no_dens_train))
324
	if isinstance(gpmodel, celerite.GP):
325
		logging.info("(GP) jitter: %s", gpmodel.kernel.jitter)
326
	model_name = "_".join(gpmodel.mean.get_parameter_names()).replace(':', '')
327
	gpmodel_name = model_name + gpname
328
	logging.info("GP model name: %s", gpmodel_name)
329
330
	pre_opt = False
331
	if args.optimize > 0:
332
		def gpmodel_mean(x, *p):
333
			gpmodel.set_parameter_vector(p)
334
			return gpmodel.mean.get_value(x)
335
336
		def gpmodel_res(x, *p):
337
			gpmodel.set_parameter_vector(p)
338
			return (gpmodel.mean.get_value(x) - no_dens_train) / no_errs_train
339
340
		def lpost(p, y, gp):
341
			gp.set_parameter_vector(p)
342
			return gp.log_likelihood(y, quiet=True) + gp.log_prior()
343
344
		def nlpost(p, y, gp):
345
			lp = lpost(p, y, gp)
0 ignored issues
show
introduced by
The variable lpost does not seem to be defined in case args.optimize > 0 on line 331 is False. Are you sure this can never be the case?
Loading history...
346
			return -lp if np.isfinite(lp) else 1e25
347
348
		def grad_nlpost(p, y, gp):
349
			gp.set_parameter_vector(p)
350
			grad_ll = gp.grad_log_likelihood(y)
351
			if isinstance(grad_ll, tuple):
352
				# celerite
353
				return -grad_ll[1]
354
			# george
355
			return -grad_ll
356
357
		if args.optimize == 1:
358
			resop_gp = op.minimize(
359
				nlpost,
360
				gpmodel.get_parameter_vector(),
361
				args=(no_dens_train, gpmodel),
362
				bounds=bounds,
363
				# method="l-bfgs-b", options=dict(disp=True, maxcor=100, eps=1e-9, ftol=2e-15, gtol=1e-8))
364
				method="l-bfgs-b", jac=grad_nlpost)
365
				# method="tnc", options=dict(disp=True, maxiter=500, xtol=1e-12))
366
				# method="nelder-mead", options=dict(disp=True, maxfev=100000, fatol=1.49012e-8, xatol=1.49012e-8))
367
				# method="Powell", options=dict(ftol=1.49012e-08, xtol=1.49012e-08))
368
		if args.optimize == 2:
369
			resop_gp = op.differential_evolution(
370
				nlpost,
371
				bounds=bounds,
372
				args=(no_dens_train, gpmodel),
373
				popsize=2 * args.walkers, tol=0.01)
374
		if args.optimize == 3:
375
			resop_bh = op.basinhopping(
376
				nlpost,
377
				gpmodel.get_parameter_vector(),
378
				niter=200,
379
				minimizer_kwargs=dict(
380
					args=(no_dens_train, gpmodel),
381
					bounds=bounds,
382
					# method="tnc"))
383
					# method="l-bfgs-b", options=dict(maxcor=100)))
384
					method="l-bfgs-b", jac=grad_nlpost))
385
					# method="Nelder-Mead"))
386
					# method="BFGS"))
387
					# method="Powell", options=dict(ftol=1.49012e-08, xtol=1.49012e-08)))
388
			logging.debug("optimization result: %s", resop_bh)
389
			resop_gp = resop_bh.lowest_optimization_result
390
		if args.optimize == 4:
391
			resop_gp, cov_gp = op.curve_fit(
392
				gpmodel_mean,
393
				no_ys_train, no_dens_train, gpmodel.get_parameter_vector(),
394
				bounds=tuple(np.array(bounds).T),
395
				# method='lm',
396
				# absolute_sigma=True,
397
				sigma=no_errs_train)
398
			print(resop_gp, np.sqrt(np.diag(cov_gp)))
399
		logging.info("%s", resop_gp.message)
0 ignored issues
show
introduced by
The variable resop_gp does not seem to be defined in case args.optimize == 1 on line 357 is False. Are you sure this can never be the case?
Loading history...
400
		logging.debug("optimization result: %s", resop_gp)
401
		logging.info("gpmodel dict: %s", gpmodel.get_parameter_dict())
402
		logging.info("log posterior trained: %s", lpost(gpmodel.get_parameter_vector(), no_dens_train, gpmodel))
403
		gpmodel.compute(no_ys_test, no_errs_test)
404
		logging.info("log posterior test: %s", lpost(gpmodel.get_parameter_vector(), no_dens_test, gpmodel))
405
		gpmodel.compute(no_ys, no_errs)
406
		logging.info("log posterior all: %s", lpost(gpmodel.get_parameter_vector(), no_dens, gpmodel))
407
		# cross check to make sure that the gpmodel parameter vector is really
408
		# set to the fitted parameters
409
		logging.info("opt. model vector: %s", resop_gp.x)
410
		gpmodel.compute(no_ys_train, no_errs_train)
411
		logging.debug("opt. log posterior trained 1: %s", lpost(resop_gp.x, no_dens_train, gpmodel))
412
		gpmodel.compute(no_ys_test, no_errs_test)
413
		logging.debug("opt. log posterior test 1: %s", lpost(resop_gp.x, no_dens_test, gpmodel))
414
		gpmodel.compute(no_ys, no_errs)
415
		logging.debug("opt. log posterior all 1: %s", lpost(resop_gp.x, no_dens, gpmodel))
416
		logging.debug("opt. model vector: %s", gpmodel.get_parameter_vector())
417
		gpmodel.compute(no_ys_train, no_errs_train)
418
		logging.debug("opt. log posterior trained 2: %s", lpost(gpmodel.get_parameter_vector(), no_dens_train, gpmodel))
419
		gpmodel.compute(no_ys_test, no_errs_test)
420
		logging.debug("opt. log posterior test 2: %s", lpost(gpmodel.get_parameter_vector(), no_dens_test, gpmodel))
421
		gpmodel.compute(no_ys, no_errs)
422
		logging.debug("opt. log posterior all 2: %s", lpost(gpmodel.get_parameter_vector(), no_dens, gpmodel))
423
		pre_opt = resop_gp.success
424
	try:
425
		logging.info("GM lt: %s", gpmodel.get_parameter("mean:GM:tau0"))
426
	except ValueError:
427
		pass
428
	logging.info("(GP) model: %s", gpmodel.kernel)
429
	if isinstance(gpmodel, celerite.GP):
430
		logging.info("(GP) jitter: %s", gpmodel.kernel.jitter)
431
432
	bestfit = gpmodel.get_parameter_vector()
433
	filename_base = ("NO_regress_fit_{0}_{1:.0f}_{2:.0f}_{{0}}_{3}"
434
					.format(gpmodel_name, lat * 10, alt, ksub))
435
436
	if args.mcmc:
437
		gpmodel.compute(no_ys_train, no_errs_train)
438
		samples, lnp = mcmc_sample_model(gpmodel,
439
				no_dens_train,
440
				beta=1.0,
441
				nwalkers=args.walkers, nburnin=args.burn_in,
442
				nprod=args.production, nthreads=args.threads,
443
				show_progress=args.progress,
444
				optimized=pre_opt, bounds=bounds, return_logpost=True)
445
446
		if args.train_fraction < 1. or args.test_fraction < 1.:
447
			logging.info("Statistics for the test samples")
448
			mcmc_statistics(gpmodel,
449
					no_ys_test, no_dens_test, no_errs_test,
450
					no_ys_train, no_dens_train, no_errs_train,
451
					samples, lnp,
452
			)
453
		logging.info("Statistics for all samples")
454
		mcmc_statistics(gpmodel,
455
				no_ys, no_dens, no_errs,
456
				no_ys_train, no_dens_train, no_errs_train,
457
				samples, lnp,
458
		)
459
460
		sampl_percs = np.percentile(samples, [2.5, 50, 97.5], axis=0)
461
		if args.plot_corner:
462
			import corner
463
			# Corner plot of the sampled parameters
464
			fig = corner.corner(samples,
465
					quantiles=[0.025, 0.5, 0.975],
466
					show_titles=True,
467
					labels=gpmodel.get_parameter_names(),
468
					truths=bestfit,
469
					hist_args=dict(normed=True))
470
			fig.savefig(filename_base.format("corner") + ".pdf", transparent=True)
471
472
		if args.save_samples:
473
			if args.samples_format in ["npz"]:
474
				# save the samples compressed to save space.
475
				np.savez_compressed(filename_base.format("sampls") + ".npz",
476
					samples=samples)
477
			if args.samples_format in ["nc", "netcdf4"]:
478
				save_samples_netcdf(filename_base.format("sampls") + ".nc",
479
					gpmodel, alt, lat, samples, scale=args.scale, compressed=True)
480
			if args.samples_format in ["h5", "hdf5"]:
481
				save_samples_netcdf(filename_base.format("sampls") + ".h5",
482
					gpmodel, alt, lat, samples, scale=args.scale, compressed=True)
483
		# MCMC finished here
484
485
	# set the model times and errors to use the full data set for plotting
486
	gpmodel.compute(no_ys, no_errs)
487
	if args.save_model:
488
		try:
489
			# python 2
490
			import cPickle as pickle
491
		except ImportError:
492
			# python 3
493
			import pickle
494
		# pickle and save the model
495
		with open(filename_base.format("model") + ".pkl", "wb") as f:
496
			pickle.dump((gpmodel), f, -1)
497
498
	if args.plot_samples and args.mcmc:
499
		plot_random_samples(gpmodel, no_ys, no_dens, no_errs,
500
				samples, args.scale,
0 ignored issues
show
introduced by
The variable samples does not seem to be defined in case args.mcmc on line 436 is False. Are you sure this can never be the case?
Loading history...
501
				filename_base.format("sampls") + ".pdf",
502
				size=4, extra_years=[4, 2])
503
504
	if args.plot_median:
505
		plot_single_sample_and_residuals(gpmodel, no_ys, no_dens, no_errs,
506
				sampl_percs[1],
0 ignored issues
show
introduced by
The variable sampl_percs does not seem to be defined in case args.mcmc on line 436 is False. Are you sure this can never be the case?
Loading history...
507
				filename_base.format("median") + ".pdf")
508
	if args.plot_residuals:
509
		plot_residual(gpmodel, no_ys, no_dens, no_errs,
510
				sampl_percs[1], args.scale,
511
				filename_base.format("medres") + ".pdf")
512
	if args.plot_maxlnp:
513
		plot_single_sample_and_residuals(gpmodel, no_ys, no_dens, no_errs,
514
				samples[np.argmax(lnp)],
0 ignored issues
show
introduced by
The variable lnp does not seem to be defined in case args.mcmc on line 436 is False. Are you sure this can never be the case?
Loading history...
515
				filename_base.format("maxlnp") + ".pdf")
516
	if args.plot_maxlnpres:
517
		plot_residual(gpmodel, no_ys, no_dens, no_errs,
518
				samples[np.argmax(lnp)], args.scale,
519
				filename_base.format("mlpres") + ".pdf")
520
521
	labels = gpmodel.get_parameter_names()
522
	logging.info("param percentiles [2.5, 50, 97.5]:")
523
	for pc, label in zip(sampl_percs.T, labels):
524
		median = pc[1]
525
		pc_minus = median - pc[0]
526
		pc_plus = pc[2] - median
527
		logging.debug("%s: %s", label, pc)
528
		logging.info("%s: %.6f (- %.6f) (+ %.6f)", label,
529
				median, pc_minus, pc_plus)
530
531
	logging.info("Finished successfully.")
532
533
534 1
if __name__ == "__main__":
535
	main()
536