Conditions | 3 |
Total Lines | 138 |
Code Lines | 85 |
Lines | 0 |
Ratio | 0 % |
Tests | 71 |
CRAP Score | 3 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.
There are several approaches to avoid long parameter lists:
1 | # -*- coding: utf-8 -*- |
||
226 | 1 | def mcmc_statistics(model, times, data, errs, |
|
227 | train_times, train_data, train_errs, |
||
228 | samples, lnp, |
||
229 | median=False): |
||
230 | """Statistics for the (GP) model against the provided data |
||
231 | |||
232 | Statistical information about the model and the sampled parameter |
||
233 | distributions with respect to the provided data and its variance. |
||
234 | |||
235 | Sends the calculated values to the logger, includes the mean |
||
236 | standardized log loss as described in R&W, 2006, section 2.5, (2.34), |
||
237 | and some slightly adapted $\\chi^2_{\\text{red}}$ and $R^2$ scores. |
||
238 | |||
239 | Parameters |
||
240 | ---------- |
||
241 | model : `celerite.GP`, `george.GP` or `CeleriteModelSet` instance |
||
242 | The model instance whose parameter distribution was drawn. |
||
243 | times : (M,) array_like |
||
244 | The test coordinates to predict or evaluate the model on. |
||
245 | data : (M,) array_like |
||
246 | The test data to test the model against. |
||
247 | errs : (M,) array_like |
||
248 | The errors (variances) of the test data. |
||
249 | train_times : (N,) array_like |
||
250 | The coordinates on which the model was trained. |
||
251 | train_data : (N,) array_like |
||
252 | The data on which the model was trained. |
||
253 | train_errs : (N,) array_like |
||
254 | The errors (variances) of the training data. |
||
255 | samples : (K, L) array_like |
||
256 | The `K` MCMC samples of the `L` parameter distributions. |
||
257 | lnp : (K,) array_like |
||
258 | The posterior log probabilities of the `K` MCMC samples. |
||
259 | median : bool, optional |
||
260 | Whether to use the median of the sampled distributions or |
||
261 | the maximum posterior sample (the default) to evaluate the |
||
262 | statistics. |
||
263 | |||
264 | Returns |
||
265 | ------- |
||
266 | nothing |
||
267 | """ |
||
268 | 1 | ndat = len(times) |
|
269 | 1 | ndim = len(model.get_parameter_vector()) |
|
270 | 1 | mdim = len(model.mean.get_parameter_vector()) |
|
271 | 1 | samples_max_lp = np.max(lnp) |
|
272 | 1 | if median: |
|
273 | sample_pos = np.nanmedian(samples, axis=0) |
||
274 | else: |
||
275 | 1 | sample_pos = samples[np.argmax(lnp)] |
|
276 | 1 | model.set_parameter_vector(sample_pos) |
|
277 | # calculate the GP predicted values and covariance |
||
278 | 1 | gppred, gpcov = model.predict(train_data, t=times, return_cov=True) |
|
279 | # the predictive covariance should include the data variance |
||
280 | 1 | gpcov[np.diag_indices_from(gpcov)] += errs**2 |
|
281 | # residuals |
||
282 | 1 | resid_mod = model.mean.get_value(times) - data # GP mean model |
|
283 | 1 | resid_gp = gppred - data # GP prediction |
|
284 | 1 | resid_triv = np.nanmean(train_data) - data # trivial model |
|
285 | 1 | _const = ndat * np.log(2.0 * np.pi) |
|
286 | 1 | test_logpred = -0.5 * (resid_gp.dot(linalg.solve(gpcov, resid_gp)) |
|
287 | + np.trace(np.log(gpcov)) |
||
288 | + _const) |
||
289 | # MSLL -- mean standardized log loss |
||
290 | # as described in R&W, 2006, section 2.5, (2.34) |
||
291 | 1 | var_mod = np.nanvar(resid_mod, ddof=mdim) # mean model variance |
|
292 | 1 | var_gp = np.nanvar(resid_gp, ddof=ndim) # gp model variance |
|
293 | 1 | var_triv = np.nanvar(train_data, ddof=1) # trivial model variance |
|
294 | 1 | logpred_mod = _log_prob(resid_mod, var_mod) |
|
295 | 1 | logpred_gp = _log_prob(resid_gp, var_gp) |
|
296 | 1 | logpred_triv = _log_prob(resid_triv, var_triv) |
|
297 | 1 | logging.info("MSLL mean: %s", np.nanmean(-logpred_mod + logpred_triv)) |
|
298 | 1 | logging.info("MSLL gp: %s", np.nanmean(-logpred_gp + logpred_triv)) |
|
299 | # predictive variances |
||
300 | 1 | logpred_mod = _log_prob(resid_mod, var_mod + errs**2) |
|
301 | 1 | logpred_gp = _log_prob(resid_gp, var_gp + errs**2) |
|
302 | 1 | logpred_triv = _log_prob(resid_triv, var_triv + errs**2) |
|
303 | 1 | logging.info("pred MSLL mean: %s", np.nanmean(-logpred_mod + logpred_triv)) |
|
304 | 1 | logging.info("pred MSLL gp: %s", np.nanmean(-logpred_gp + logpred_triv)) |
|
305 | # cost values |
||
306 | 1 | cost_mod = np.sum(resid_mod**2) |
|
307 | 1 | cost_triv = np.sum(resid_triv**2) |
|
308 | 1 | cost_gp = np.sum(resid_gp**2) |
|
309 | # chi^2 (variance corrected costs) |
||
310 | 1 | chisq_mod_ye = np.sum((resid_mod / errs)**2) |
|
311 | 1 | chisq_triv = np.sum((resid_triv / errs)**2) |
|
312 | 1 | chisq_gpcov = resid_mod.dot(linalg.solve(gpcov, resid_mod)) |
|
313 | # adjust for degrees of freedom |
||
314 | 1 | cost_gp_dof = cost_gp / (ndat - ndim) |
|
315 | 1 | cost_mod_dof = cost_mod / (ndat - mdim) |
|
316 | 1 | cost_triv_dof = cost_triv / (ndat - 1) |
|
317 | # reduced chi^2 |
||
318 | 1 | chisq_red_mod_ye = chisq_mod_ye / (ndat - mdim) |
|
319 | 1 | chisq_red_triv = chisq_triv / (ndat - 1) |
|
320 | 1 | chisq_red_gpcov = chisq_gpcov / (ndat - ndim) |
|
321 | # "generalized" R^2 |
||
322 | 1 | logp_triv1 = np.sum(_log_prob(resid_triv, errs**2)) |
|
323 | 1 | logp_triv2 = np.sum(_log_prob(resid_triv, var_triv)) |
|
324 | 1 | logp_triv3 = np.sum(_log_prob(resid_triv, var_triv + errs**2)) |
|
325 | 1 | log_lambda1 = test_logpred - logp_triv1 |
|
326 | 1 | log_lambda2 = test_logpred - logp_triv2 |
|
327 | 1 | log_lambda3 = test_logpred - logp_triv3 |
|
328 | 1 | gen_rsq1a = 1 - np.exp(-2 * log_lambda1 / ndat) |
|
329 | 1 | gen_rsq1b = 1 - np.exp(-2 * log_lambda1 / (ndat - ndim)) |
|
330 | 1 | gen_rsq2a = 1 - np.exp(-2 * log_lambda2 / ndat) |
|
331 | 1 | gen_rsq2b = 1 - np.exp(-2 * log_lambda2 / (ndat - ndim)) |
|
332 | 1 | gen_rsq3a = 1 - np.exp(-2 * log_lambda3 / ndat) |
|
333 | 1 | gen_rsq3b = 1 - np.exp(-2 * log_lambda3 / (ndat - ndim)) |
|
334 | # sent to the logger |
||
335 | 1 | logging.info("train max logpost: %s", samples_max_lp) |
|
336 | 1 | logging.info("test log_pred: %s", test_logpred) |
|
337 | 1 | logging.info("1a cost mean model: %s, dof adj: %s", cost_mod, cost_mod_dof) |
|
338 | 1 | logging.debug("1c cost gp predict: %s, dof adj: %s", cost_gp, cost_gp_dof) |
|
339 | 1 | logging.debug("1b cost triv model: %s, dof adj: %s", cost_triv, cost_triv_dof) |
|
340 | 1 | logging.info("1d var resid mean model: %s, gp model: %s, triv: %s", |
|
341 | var_mod, var_gp, var_triv) |
||
342 | 1 | logging.info("2a adjR2 mean model: %s, adjR2 gp predict: %s", |
|
343 | 1 - cost_mod_dof / cost_triv_dof, 1 - cost_gp_dof / cost_triv_dof) |
||
344 | 1 | logging.info("2b red chi^2 mod: %s / triv: %s = %s", |
|
345 | chisq_red_mod_ye, chisq_red_triv, chisq_red_mod_ye / chisq_red_triv) |
||
346 | 1 | logging.info("2c red chi^2 mod (gp cov): %s / triv: %s = %s", |
|
347 | chisq_red_gpcov, chisq_red_triv, chisq_red_gpcov / chisq_red_triv) |
||
348 | 1 | logging.info("3a stand. red chi^2: %s", chisq_red_gpcov / chisq_red_triv) |
|
349 | 1 | logging.info("3b 1 - stand. red chi^2: %s", |
|
350 | 1 - chisq_red_gpcov / chisq_red_triv) |
||
351 | 1 | logging.info("5a generalized R^2: 1a: %s, 1b: %s", |
|
352 | gen_rsq1a, gen_rsq1b) |
||
353 | 1 | logging.info("5b generalized R^2: 2a: %s, 2b: %s", |
|
354 | gen_rsq2a, gen_rsq2b) |
||
355 | 1 | logging.info("5c generalized R^2: 3a: %s, 3b: %s", |
|
356 | gen_rsq3a, gen_rsq3b) |
||
357 | 1 | try: |
|
358 | # celerite |
||
359 | 1 | logdet = model.solver.log_determinant() |
|
360 | 1 | except TypeError: |
|
361 | # george |
||
362 | 1 | logdet = model.solver.log_determinant |
|
363 | logging.debug("5 logdet: %s, const 2: %s", logdet, _const) |
||
364 |