Completed
Push — master ( 2074c3...e65310 )
by Andy
21s
created

_remove_forbidden_op_kwds()   B

Complexity

Conditions 3

Size

Total Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
c 1
b 0
f 0
dl 0
loc 27
rs 8.8571
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
"""
5
Fitting functions for use in The Cannon.
6
"""
7
8
from __future__ import (division, print_function, absolute_import,
9
                        unicode_literals)
10
11
__all__ = ["fit_spectrum", "fit_pixel_fixed_scatter", "fit_theta_by_linalg",
12
    "chi_sq", "L1Norm_variation"]
13
14
import logging
15
import numpy as np
16
import scipy.optimize as op
17
from time import time
18
19
logger = logging.getLogger(__name__)
20
21
22
def fit_spectrum(flux, ivar, initial_labels, vectorizer, theta, s2, fiducials,
23
    scales, dispersion=None, **kwargs):
24
    """
25
    Fit a single spectrum by least-squared fitting.
26
27
    :param flux:
28
        The normalized flux values.
29
30
    :param ivar:
31
        The inverse variance array for the normalized fluxes.
32
33
    :param initial_labels:
34
        The point(s) to initialize optimization from.
35
36
    :param vectorizer:
37
        The vectorizer to use when fitting the data.
38
39
    :param theta:
40
        The theta coefficients (spectral derivatives) of the trained model.
41
42
    :param s2:
43
        The pixel scatter (s^2) array for each pixel.
44
45
    :param dispersion: [optional]
46
        The dispersion (e.g., wavelength) points for the normalized fluxes.
47
48
    :returns:
49
        A three-length tuple containing: the optimized labels, the covariance
50
        matrix, and metadata associated with the optimization.
51
    """
52
53
    adjusted_ivar = ivar/(1. + ivar * s2)
54
55
    # Exclude non-finite points (e.g., points with zero inverse variance
56
    # or non-finite flux values, but the latter shouldn't exist anyway).
57
    use = np.isfinite(flux * adjusted_ivar) * (adjusted_ivar > 0)
58
    L = len(vectorizer.label_names)
59
60
    if not np.any(use):
61
        logger.warn("No information in spectrum!")
62
        return (np.nan * np.ones(L), None, {
63
                "fail_message": "Pixels contained no information"})
64
65
    # Splice the arrays we will use most.
66
    flux = flux[use]
67
    weights = np.sqrt(adjusted_ivar[use]) # --> 1.0 / sigma
68
    use_theta = theta[use]
69
70
    initial_labels = np.atleast_2d(initial_labels)
71
72
    # Check the vectorizer whether it has a derivative built in.
73
    Dfun = kwargs.pop("Dfun", True)
74
    if Dfun not in (None, False):
75
        try:
76
            vectorizer.get_label_vector_derivative(initial_labels[0])
77
78
        except NotImplementedError:
79
            Dfun = None
80
            logger.warn("No label vector derivatives available in {}!".format(
81
                vectorizer))
82
83
        except:
84
            logger.exception("Exception raised when trying to calculate the "\
85
                             "label vector derivative at the fiducial values:")
86
            raise
87
88
        else:
89
            # Use the label vector derivative.
90
            Dfun = lambda parameters: weights * np.dot(use_theta,
91
                vectorizer.get_label_vector_derivative(parameters)).T
92
93
    else:
94
        Dfun = None
95
96
    def func(parameters):
97
        return np.dot(use_theta, vectorizer(parameters))[:, 0]
98
99
    def residuals(parameters):
100
        return weights * (func(parameters) - flux)
101
102
    kwds = {
103
        "func": residuals,
104
        "Dfun": Dfun,
105
        "col_deriv": True,
106
107
        # These get passed through to leastsq:
108
        "ftol": 7./3 - 4./3 - 1, # Machine precision.
109
        "xtol": 7./3 - 4./3 - 1, # Machine precision.
110
        "gtol": 0.0,
111
        "maxfev": 100000, # MAGIC
112
        "epsfcn": None,
113
        "factor": 1.0,
114
    }
115
116
    # Only update the keywords with things that op.curve_fit/op.leastsq expects.
117
    for key in set(kwargs).intersection(kwds):
118
        kwds[key] = kwargs[key]
119
120
    results = []
121
    for x0 in initial_labels:
122
123
        try:
124
            op_labels, cov, meta, mesg, ier = op.leastsq(
125
                x0=(x0 - fiducials)/scales, full_output=True, **kwds)
126
127
        except RuntimeError:
128
            logger.exception("Exception in fitting from {}".format(x0))
129
            continue
130
131
        meta.update(
132
            dict(x0=x0, chi_sq=np.sum(meta["fvec"]**2), ier=ier, mesg=mesg))
133
        results.append((op_labels, cov, meta))
134
135
    if len(results) == 0:
136
        logger.warn("No results found!")
137
        return (np.nan * np.ones(L), None, dict(fail_message="No results found"))
138
139
    best_result_index = np.nanargmin([m["chi_sq"] for (o, c, m) in results])
140
    op_labels, cov, meta = results[best_result_index]
141
142
    # De-scale the optimized labels.
143
    meta["model_flux"] = func(op_labels)
144
    op_labels = op_labels * scales + fiducials
145
146
    if np.allclose(op_labels, meta["x0"]):
147
        logger.warn(
148
            "Discarding optimized result because it is exactly the same as the "
149
            "initial value!")
150
151
        # We are in dire straits. We should not trust the result.
152
        op_labels *= np.nan
153
        meta["fail_message"] = "Optimized result same as initial value."
154
155
    if cov is None:
156
        cov = np.ones((len(op_labels), len(op_labels)))
157
158
    if not np.any(np.isfinite(cov)):
159
        logger.warn("Non-finite covariance matrix returned!")
160
161
    # Save additional information.
162
    meta.update({
163
        "method": "leastsq",
164
        "label_names": vectorizer.label_names,
165
        "best_result_index": best_result_index,
166
        "derivatives_used": Dfun is not None,
167
        "snr": np.nanmedian(flux * weights),
168
        "r_chi_sq": meta["chi_sq"]/(use.sum() - L - 1),
169
    })
170
    for key in ("ftol", "xtol", "gtol", "maxfev", "factor", "epsfcn"):
171
        meta[key] = kwds[key]
172
173
    return (op_labels, cov, meta)
174
175
176
177
def fit_theta_by_linalg(flux, ivar, s2, design_matrix):
178
    """
179
    Fit theta coefficients to a set of normalized fluxes for a single pixel.
180
181
    :param flux:
182
        The normalized fluxes for a single pixel (across many stars).
183
184
    :param ivar:
185
        The inverse variance of the normalized flux values for a single pixel
186
        across many stars.
187
188
    :param s2:
189
        The noise residual (squared scatter term) to adopt in the pixel.
190
191
    :param design_matrix:
192
        The model design matrix.
193
194
    :returns:
195
        The label vector coefficients for the pixel, and the inverse variance
196
        matrix.
197
    """
198
199
    adjusted_ivar = ivar/(1. + ivar * s2)
200
    CiA = design_matrix * np.tile(adjusted_ivar, (design_matrix.shape[1], 1)).T
201
    try:
202
        ATCiAinv = np.linalg.inv(np.dot(design_matrix.T, CiA))
203
    except np.linalg.linalg.LinAlgError:
204
        N = design_matrix.shape[1]
205
        return (np.hstack([1, np.zeros(N - 1)]), np.inf * np.eye(N))
206
207
    ATY = np.dot(design_matrix.T, flux * adjusted_ivar)
208
    theta = np.dot(ATCiAinv, ATY)
209
210
    return (theta, ATCiAinv)
211
212
213
214
# TODO: This logic should probably go somewhere else.
215
216
217
def chi_sq(theta, design_matrix, flux, ivar, axis=None, gradient=True):
218
    """
219
    Calculate the chi-squared difference between the spectral model and flux.
220
221
    :param theta:
222
        The theta coefficients.
223
224
    :param design_matrix:
225
        The model design matrix.
226
227
    :param flux:
228
        The normalized flux values.
229
230
    :param ivar:
231
        The inverse variances of the normalized flux values.
232
233
    :param axis: [optional]
234
        The axis to sum the chi-squared values across.
235
236
    :param gradient: [optional]
237
        Return the chi-squared value and its derivatives (Jacobian).
238
239
    :returns:
240
        The chi-squared difference between the spectral model and flux, and
241
        optionally, the Jacobian.
242
    """
243
    residuals = np.dot(theta, design_matrix.T) - flux
244
245
    ivar_residuals = ivar * residuals
246
    f = np.sum(ivar_residuals * residuals, axis=axis)
247
    if not gradient:
248
        return f
249
250
    g = 2.0 * np.dot(design_matrix.T, ivar_residuals)
251
    return (f, g)
252
253
254
def L1Norm_variation(theta):
255
    """
256
    Return the L1 norm of theta (except the first entry) and its derivative.
257
258
    :param theta:
259
        An array of finite values.
260
261
    :returns:
262
        A two-length tuple containing: the L1 norm of theta (except the first
263
        entry), and the derivative of the L1 norm of theta.
264
    """
265
266
    return (np.sum(np.abs(theta[1:])), np.hstack([0.0, np.sign(theta[1:])]))
267
268
269
def _pixel_objective_function_fixed_scatter(theta, design_matrix, flux, ivar,
270
    regularization, gradient=True):
271
    """
272
    The objective function for a single regularized pixel with fixed scatter.
273
274
    :param theta:
275
        The spectral coefficients.
276
277
    :param normalized_flux:
278
        The normalized flux values for a single pixel across many stars.
279
280
    :param adjusted_ivar:
281
        The adjusted inverse variance of the normalized flux values for a single
282
        pixel across many stars. This adjusted inverse variance array should
283
        already have the scatter included.
284
285
    :param regularization:
286
        The regularization term to scale the L1 norm of theta with.
287
288
    :param design_matrix:
289
        The design matrix for the model.
290
291
    :param gradient: [optional]
292
        Also return the analytic derivative of the objective function.
293
    """
294
295
    if gradient:
296
        csq, d_csq = chi_sq(theta, design_matrix, flux, ivar, gradient=True)
297
        L1, d_L1 = L1Norm_variation(theta)
298
299
        f = csq + regularization * L1
300
        g = d_csq + regularization * d_L1
301
302
        return (f, g)
303
304
    else:
305
        csq = chi_sq(theta, design_matrix, flux, ivar, gradient=False)
306
        L1, d_L1 = L1Norm_variation(theta)
307
308
        return csq + regularization * L1
309
310
311
def _scatter_objective_function(scatter, residuals_squared, ivar):
312
    adjusted_ivar = ivar/(1.0 + ivar * scatter**2)
313
    chi_sq = residuals_squared * adjusted_ivar
314
    return (np.mean(chi_sq) - 1.0)**2
315
316
317
def _remove_forbidden_op_kwds(op_method, op_kwds):
318
    """
319
    Remove forbidden optimization keywords.
320
321
    :param op_method:
322
        The optimization algorithm to use.
323
324
    :param op_kwds:
325
        Optimization keywords.
326
327
    :returns:
328
        `None`. The dictionary of `op_kwds` will be updated.
329
    """
330
    all_allowed_keys = dict(
331
        l_bfgs_b=("bounds", "m", "factr", "pgtol", "epsilon", "iprint", 
332
            "maxfun", "maxiter", "disp", "callback", "maxls"),
333
        powell=("xtol", "ftol", "maxiter", "maxfun", "full_output", "disp",
334
            "retall", "callback", "initial_simplex"))
335
336
    forbidden_keys = set(op_kwds).difference(all_allowed_keys[op_method])
337
    if forbidden_keys:
338
        logger.warn("Ignoring forbidden optimization keywords for {}: {}"\
339
            .format(op_method, ", ".join(forbidden_keys)))
340
        for key in forbidden_keys:
341
            del op_kwds[key]
342
343
    return None
344
            
345
346
347
def fit_pixel_fixed_scatter(flux, ivar, initial_thetas, design_matrix,
348
    regularization, censoring_mask, **kwargs):
349
    """
350
    Fit theta coefficients and noise residual for a single pixel, using
351
    an initially fixed scatter value.
352
353
    :param flux:
354
        The normalized flux values.
355
356
    :param ivar:
357
        The inverse variance array for the normalized fluxes.
358
359
    :param initial_thetas:
360
        A list of initial theta values to start from, and their source. For
361
        example: `[(theta_0, "guess"), (theta_1, "old_theta")]
362
363
    :param design_matrix:
364
        The model design matrix.
365
366
    :param regularization:
367
        The regularization strength to apply during optimization (Lambda).
368
369
    :param censoring_mask:
370
        A per-label censoring mask for each pixel.
371
372
    :keyword op_method:
373
        The optimization method to use. Valid options are: `l_bfgs_b`, `powell`.
374
375
    :keyword op_kwds:
376
        A dictionary of arguments that will be provided to the optimizer.
377
378
    :returns:
379
        The optimized theta coefficients, the noise residual `s2`, and
380
        metadata related to the optimization process.
381
    """
382
383
    if np.sum(ivar) < 1.0 * ivar.size: # MAGIC
384
        metadata = dict(message="No pixel information.", op_time=0.0)
385
        fiducial = np.hstack([1.0, np.zeros(design_matrix.shape[1] - 1)])
386
        return (fiducial, np.inf, metadata) # MAGIC
387
388
    # Determine if any theta coefficients will be censored.
389
    censored_theta = ~np.any(np.isfinite(design_matrix), axis=0)
390
    # Make the design matrix safe to use.
391
    design_matrix[:, censored_theta] = 0
392
393
    feval = []
394
    for initial_theta, initial_theta_source in initial_thetas:
395
        feval.append(_pixel_objective_function_fixed_scatter(
396
            initial_theta, design_matrix, flux, ivar, regularization, False))
397
398
    initial_theta, initial_theta_source = initial_thetas[np.nanargmin(feval)]
399
400
    base_op_kwds = dict(x0=initial_theta,
401
        args=(design_matrix, flux, ivar, regularization),
402
        disp=False, maxfun=np.inf, maxiter=np.inf)
403
404
    if any(censored_theta):
405
        # If the initial_theta is the same size as the censored_mask, but different
406
        # to the design_matrix, then we need to censor the initial theta so that we
407
        # don't bother solving for those parameters.
408
        base_op_kwds["x0"] = np.array(base_op_kwds["x0"])[~censored_theta]
409
        base_op_kwds["args"] = (design_matrix[:, ~censored_theta], flux, ivar,
410
            regularization)
411
412
    # Allow either l_bfgs_b or powell
413
    t_init = time()
414
    default_op_method = "l_bfgs_b"
415
    op_method = kwargs.get("op_method", default_op_method) or default_op_method
416
    op_method = op_method.lower()
417
418
    while True:
419
        if op_method == "l_bfgs_b":
420
            op_kwds = base_op_kwds.copy()
421
            op_kwds.update(m=design_matrix.shape[1], factr=10.0, pgtol=1e-6)
422
            op_kwds.update((kwargs.get("op_kwds", {}) or {}))
423
424
            # If op_bounds are given and we are censoring some theta terms, then we
425
            # will need to adjust which op_bounds we provide.
426
            if "bounds" in op_kwds and any(censored_theta):
427
                op_kwds["bounds"] = [b for b, is_censored in \
428
                    zip(op_kwds["bounds"], censored_theta) if not is_censored]
429
430
            # Only include allowable keywords.
431
            _remove_forbidden_op_kwds(op_method, op_kwds)
432
433
            op_params, fopt, metadata = op.fmin_l_bfgs_b(
434
                _pixel_objective_function_fixed_scatter,
435
                fprime=None, approx_grad=None, **op_kwds)
436
437
            metadata.update(dict(fopt=fopt))
438
439
            warnflag = metadata.get("warnflag", -1)
440
            if warnflag > 0:
441
                reason = "too many function evaluations or too many iterations" \
442
                         if warnflag == 1 else metadata["task"]
443
                logger.warn("Optimization warning (l_bfgs_b): {}".format(reason))
444
445
                # Do optimization again.
446
                op_method = "powell" 
447
                base_op_kwds.update(x0=op_params)
448
449
            else:
450
                break
451
452
        elif op_method == "powell":
453
            op_kwds = base_op_kwds.copy()
454
            op_kwds.update(xtol=1e-6, ftol=1e-6)
455
            op_kwds.update((kwargs.get("op_kwds", {}) or {}))
456
457
            # Only include allowable keywords.
458
            _remove_forbidden_op_kwds(op_method, op_kwds)
459
460
            # Set 'False' in args so that we don't return the gradient, 
461
            # because fmin doesn't want it.
462
            args = list(op_kwds["args"])
463
            args.append(False)
464
            op_kwds["args"] = tuple(args)
465
466
            t_init = time()
467
468
            op_params, fopt, direc, n_iter, n_funcs, warnflag = op.fmin_powell(
469
                _pixel_objective_function_fixed_scatter, 
470
                full_output=True, **op_kwds)
471
472
            metadata = dict(fopt=fopt, direc=direc, n_iter=n_iter, 
473
                n_funcs=n_funcs, warnflag=warnflag)
474
            break
475
476
        else:
477
            raise ValueError("unknown optimization method '{}' -- "
478
                             "powell or l_bfgs_b are available".format(op_method))
479
480
    # Additional metadata common to both optimizers.
481
    metadata.update(dict(op_method=op_method, op_time=time() - t_init,
482
        initial_theta=initial_theta, initial_theta_source=initial_theta_source))
483
484
    # De-censor the optimized parameters.
485
    if any(censored_theta):
486
        theta = np.zeros(censored_theta.size)
487
        theta[~censored_theta] = op_params
488
489
    else:
490
        theta = op_params
491
492
    # Fit the scatter.
493
    residuals_squared = (flux - np.dot(theta, design_matrix.T))**2
494
    scatter = op.fmin(_scatter_objective_function, 0.0,
495
        args=(residuals_squared, ivar), disp=False)
496
497
    return (theta, scatter**2, metadata)
498