_remove_forbidden_op_kwds() - Code Metrics - Inspection of "prevent bad keywords getting into optimization, et..." - andycasey/AnniesLasso - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 2074c3...e65310 )

by Andy

created 2018-02-28 22:47 UTC

_remove_forbidden_op_kwds() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	3
c	1
b	0
f	0
dl	0
loc	27
rs	8.8571

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Fitting functions for use in The Cannon.
"""

from __future__ import (division, print_function, absolute_import,
                        unicode_literals)

__all__ = ["fit_spectrum", "fit_pixel_fixed_scatter", "fit_theta_by_linalg",
    "chi_sq", "L1Norm_variation"]

import logging
import numpy as np
import scipy.optimize as op
from time import time

logger = logging.getLogger(__name__)


def fit_spectrum(flux, ivar, initial_labels, vectorizer, theta, s2, fiducials,
    scales, dispersion=None, **kwargs):
    """
    Fit a single spectrum by least-squared fitting.

    :param flux:
        The normalized flux values.

    :param ivar:
        The inverse variance array for the normalized fluxes.

    :param initial_labels:
        The point(s) to initialize optimization from.

    :param vectorizer:
        The vectorizer to use when fitting the data.

    :param theta:
        The theta coefficients (spectral derivatives) of the trained model.

    :param s2:
        The pixel scatter (s^2) array for each pixel.

    :param dispersion: [optional]
        The dispersion (e.g., wavelength) points for the normalized fluxes.

    :returns:
        A three-length tuple containing: the optimized labels, the covariance
        matrix, and metadata associated with the optimization.
    """

    adjusted_ivar = ivar/(1. + ivar * s2)

    # Exclude non-finite points (e.g., points with zero inverse variance
    # or non-finite flux values, but the latter shouldn't exist anyway).
    use = np.isfinite(flux * adjusted_ivar) * (adjusted_ivar > 0)
    L = len(vectorizer.label_names)

    if not np.any(use):
        logger.warn("No information in spectrum!")
        return (np.nan * np.ones(L), None, {
                "fail_message": "Pixels contained no information"})

    # Splice the arrays we will use most.
    flux = flux[use]
    weights = np.sqrt(adjusted_ivar[use]) # --> 1.0 / sigma
    use_theta = theta[use]

    initial_labels = np.atleast_2d(initial_labels)

    # Check the vectorizer whether it has a derivative built in.
    Dfun = kwargs.pop("Dfun", True)
    if Dfun not in (None, False):
        try:
            vectorizer.get_label_vector_derivative(initial_labels[0])

        except NotImplementedError:
            Dfun = None
            logger.warn("No label vector derivatives available in {}!".format(
                vectorizer))

        except:
            logger.exception("Exception raised when trying to calculate the "\
                             "label vector derivative at the fiducial values:")
            raise

        else:
            # Use the label vector derivative.
            Dfun = lambda parameters: weights * np.dot(use_theta,
                vectorizer.get_label_vector_derivative(parameters)).T

    else:
        Dfun = None

    def func(parameters):
        return np.dot(use_theta, vectorizer(parameters))[:, 0]

    def residuals(parameters):
        return weights * (func(parameters) - flux)

    kwds = {
        "func": residuals,
        "Dfun": Dfun,
        "col_deriv": True,

        # These get passed through to leastsq:
        "ftol": 7./3 - 4./3 - 1, # Machine precision.
        "xtol": 7./3 - 4./3 - 1, # Machine precision.
        "gtol": 0.0,
        "maxfev": 100000, # MAGIC
        "epsfcn": None,
        "factor": 1.0,
    }

    # Only update the keywords with things that op.curve_fit/op.leastsq expects.
    for key in set(kwargs).intersection(kwds):
        kwds[key] = kwargs[key]

    results = []
    for x0 in initial_labels:

        try:
            op_labels, cov, meta, mesg, ier = op.leastsq(
                x0=(x0 - fiducials)/scales, full_output=True, **kwds)

        except RuntimeError:
            logger.exception("Exception in fitting from {}".format(x0))
            continue

        meta.update(
            dict(x0=x0, chi_sq=np.sum(meta["fvec"]**2), ier=ier, mesg=mesg))
        results.append((op_labels, cov, meta))

    if len(results) == 0:
        logger.warn("No results found!")
        return (np.nan * np.ones(L), None, dict(fail_message="No results found"))

    best_result_index = np.nanargmin([m["chi_sq"] for (o, c, m) in results])
    op_labels, cov, meta = results[best_result_index]

    # De-scale the optimized labels.
    meta["model_flux"] = func(op_labels)
    op_labels = op_labels * scales + fiducials

    if np.allclose(op_labels, meta["x0"]):
        logger.warn(
            "Discarding optimized result because it is exactly the same as the "
            "initial value!")

        # We are in dire straits. We should not trust the result.
        op_labels *= np.nan
        meta["fail_message"] = "Optimized result same as initial value."

    if cov is None:
        cov = np.ones((len(op_labels), len(op_labels)))

    if not np.any(np.isfinite(cov)):
        logger.warn("Non-finite covariance matrix returned!")

    # Save additional information.
    meta.update({
        "method": "leastsq",
        "label_names": vectorizer.label_names,
        "best_result_index": best_result_index,
        "derivatives_used": Dfun is not None,
        "snr": np.nanmedian(flux * weights),
        "r_chi_sq": meta["chi_sq"]/(use.sum() - L - 1),
    })
    for key in ("ftol", "xtol", "gtol", "maxfev", "factor", "epsfcn"):
        meta[key] = kwds[key]

    return (op_labels, cov, meta)



def fit_theta_by_linalg(flux, ivar, s2, design_matrix):
    """
    Fit theta coefficients to a set of normalized fluxes for a single pixel.

    :param flux:
        The normalized fluxes for a single pixel (across many stars).

    :param ivar:
        The inverse variance of the normalized flux values for a single pixel
        across many stars.

    :param s2:
        The noise residual (squared scatter term) to adopt in the pixel.

    :param design_matrix:
        The model design matrix.

    :returns:
        The label vector coefficients for the pixel, and the inverse variance
        matrix.
    """

    adjusted_ivar = ivar/(1. + ivar * s2)
    CiA = design_matrix * np.tile(adjusted_ivar, (design_matrix.shape[1], 1)).T
    try:
        ATCiAinv = np.linalg.inv(np.dot(design_matrix.T, CiA))
    except np.linalg.linalg.LinAlgError:
        N = design_matrix.shape[1]
        return (np.hstack([1, np.zeros(N - 1)]), np.inf * np.eye(N))

    ATY = np.dot(design_matrix.T, flux * adjusted_ivar)
    theta = np.dot(ATCiAinv, ATY)

    return (theta, ATCiAinv)



# TODO: This logic should probably go somewhere else.


def chi_sq(theta, design_matrix, flux, ivar, axis=None, gradient=True):
    """
    Calculate the chi-squared difference between the spectral model and flux.

    :param theta:
        The theta coefficients.

    :param design_matrix:
        The model design matrix.

    :param flux:
        The normalized flux values.

    :param ivar:
        The inverse variances of the normalized flux values.

    :param axis: [optional]
        The axis to sum the chi-squared values across.

    :param gradient: [optional]
        Return the chi-squared value and its derivatives (Jacobian).

    :returns:
        The chi-squared difference between the spectral model and flux, and
        optionally, the Jacobian.
    """
    residuals = np.dot(theta, design_matrix.T) - flux

    ivar_residuals = ivar * residuals
    f = np.sum(ivar_residuals * residuals, axis=axis)
    if not gradient:
        return f

    g = 2.0 * np.dot(design_matrix.T, ivar_residuals)
    return (f, g)


def L1Norm_variation(theta):
    """
    Return the L1 norm of theta (except the first entry) and its derivative.

    :param theta:
        An array of finite values.

    :returns:
        A two-length tuple containing: the L1 norm of theta (except the first
        entry), and the derivative of the L1 norm of theta.
    """

    return (np.sum(np.abs(theta[1:])), np.hstack([0.0, np.sign(theta[1:])]))


def _pixel_objective_function_fixed_scatter(theta, design_matrix, flux, ivar,
    regularization, gradient=True):
    """
    The objective function for a single regularized pixel with fixed scatter.

    :param theta:
        The spectral coefficients.

    :param normalized_flux:
        The normalized flux values for a single pixel across many stars.

    :param adjusted_ivar:
        The adjusted inverse variance of the normalized flux values for a single
        pixel across many stars. This adjusted inverse variance array should
        already have the scatter included.

    :param regularization:
        The regularization term to scale the L1 norm of theta with.

    :param design_matrix:
        The design matrix for the model.

    :param gradient: [optional]
        Also return the analytic derivative of the objective function.
    """

    if gradient:
        csq, d_csq = chi_sq(theta, design_matrix, flux, ivar, gradient=True)
        L1, d_L1 = L1Norm_variation(theta)

        f = csq + regularization * L1
        g = d_csq + regularization * d_L1

        return (f, g)

    else:
        csq = chi_sq(theta, design_matrix, flux, ivar, gradient=False)
        L1, d_L1 = L1Norm_variation(theta)

        return csq + regularization * L1


def _scatter_objective_function(scatter, residuals_squared, ivar):
    adjusted_ivar = ivar/(1.0 + ivar * scatter**2)
    chi_sq = residuals_squared * adjusted_ivar
    return (np.mean(chi_sq) - 1.0)**2


def _remove_forbidden_op_kwds(op_method, op_kwds):
    """
    Remove forbidden optimization keywords.

    :param op_method:
        The optimization algorithm to use.

    :param op_kwds:
        Optimization keywords.

    :returns:
        `None`. The dictionary of `op_kwds` will be updated.
    """
    all_allowed_keys = dict(
        l_bfgs_b=("bounds", "m", "factr", "pgtol", "epsilon", "iprint", 
            "maxfun", "maxiter", "disp", "callback", "maxls"),
        powell=("xtol", "ftol", "maxiter", "maxfun", "full_output", "disp",
            "retall", "callback", "initial_simplex"))

    forbidden_keys = set(op_kwds).difference(all_allowed_keys[op_method])
    if forbidden_keys:
        logger.warn("Ignoring forbidden optimization keywords for {}: {}"\
            .format(op_method, ", ".join(forbidden_keys)))
        for key in forbidden_keys:
            del op_kwds[key]

    return None
            


def fit_pixel_fixed_scatter(flux, ivar, initial_thetas, design_matrix,
    regularization, censoring_mask, **kwargs):
    """
    Fit theta coefficients and noise residual for a single pixel, using
    an initially fixed scatter value.

    :param flux:
        The normalized flux values.

    :param ivar:
        The inverse variance array for the normalized fluxes.

    :param initial_thetas:
        A list of initial theta values to start from, and their source. For
        example: `[(theta_0, "guess"), (theta_1, "old_theta")]

    :param design_matrix:
        The model design matrix.

    :param regularization:
        The regularization strength to apply during optimization (Lambda).

    :param censoring_mask:
        A per-label censoring mask for each pixel.

    :keyword op_method:
        The optimization method to use. Valid options are: `l_bfgs_b`, `powell`.

    :keyword op_kwds:
        A dictionary of arguments that will be provided to the optimizer.

    :returns:
        The optimized theta coefficients, the noise residual `s2`, and
        metadata related to the optimization process.
    """

    if np.sum(ivar) < 1.0 * ivar.size: # MAGIC
        metadata = dict(message="No pixel information.", op_time=0.0)
        fiducial = np.hstack([1.0, np.zeros(design_matrix.shape[1] - 1)])
        return (fiducial, np.inf, metadata) # MAGIC

    # Determine if any theta coefficients will be censored.
    censored_theta = ~np.any(np.isfinite(design_matrix), axis=0)
    # Make the design matrix safe to use.
    design_matrix[:, censored_theta] = 0

    feval = []
    for initial_theta, initial_theta_source in initial_thetas:
        feval.append(_pixel_objective_function_fixed_scatter(
            initial_theta, design_matrix, flux, ivar, regularization, False))

    initial_theta, initial_theta_source = initial_thetas[np.nanargmin(feval)]

    base_op_kwds = dict(x0=initial_theta,
        args=(design_matrix, flux, ivar, regularization),
        disp=False, maxfun=np.inf, maxiter=np.inf)

    if any(censored_theta):
        # If the initial_theta is the same size as the censored_mask, but different
        # to the design_matrix, then we need to censor the initial theta so that we
        # don't bother solving for those parameters.
        base_op_kwds["x0"] = np.array(base_op_kwds["x0"])[~censored_theta]
        base_op_kwds["args"] = (design_matrix[:, ~censored_theta], flux, ivar,
            regularization)

    # Allow either l_bfgs_b or powell
    t_init = time()
    default_op_method = "l_bfgs_b"
    op_method = kwargs.get("op_method", default_op_method) or default_op_method
    op_method = op_method.lower()

    while True:
        if op_method == "l_bfgs_b":
            op_kwds = base_op_kwds.copy()
            op_kwds.update(m=design_matrix.shape[1], factr=10.0, pgtol=1e-6)
            op_kwds.update((kwargs.get("op_kwds", {}) or {}))

            # If op_bounds are given and we are censoring some theta terms, then we
            # will need to adjust which op_bounds we provide.
            if "bounds" in op_kwds and any(censored_theta):
                op_kwds["bounds"] = [b for b, is_censored in \
                    zip(op_kwds["bounds"], censored_theta) if not is_censored]

            # Only include allowable keywords.
            _remove_forbidden_op_kwds(op_method, op_kwds)

            op_params, fopt, metadata = op.fmin_l_bfgs_b(
                _pixel_objective_function_fixed_scatter,
                fprime=None, approx_grad=None, **op_kwds)

            metadata.update(dict(fopt=fopt))

            warnflag = metadata.get("warnflag", -1)
            if warnflag > 0:
                reason = "too many function evaluations or too many iterations" \
                         if warnflag == 1 else metadata["task"]
                logger.warn("Optimization warning (l_bfgs_b): {}".format(reason))

                # Do optimization again.
                op_method = "powell" 
                base_op_kwds.update(x0=op_params)

            else:
                break

        elif op_method == "powell":
            op_kwds = base_op_kwds.copy()
            op_kwds.update(xtol=1e-6, ftol=1e-6)
            op_kwds.update((kwargs.get("op_kwds", {}) or {}))

            # Only include allowable keywords.
            _remove_forbidden_op_kwds(op_method, op_kwds)

            # Set 'False' in args so that we don't return the gradient, 
            # because fmin doesn't want it.
            args = list(op_kwds["args"])
            args.append(False)
            op_kwds["args"] = tuple(args)

            t_init = time()

            op_params, fopt, direc, n_iter, n_funcs, warnflag = op.fmin_powell(
                _pixel_objective_function_fixed_scatter, 
                full_output=True, **op_kwds)

            metadata = dict(fopt=fopt, direc=direc, n_iter=n_iter, 
                n_funcs=n_funcs, warnflag=warnflag)
            break

        else:
            raise ValueError("unknown optimization method '{}' -- "
                             "powell or l_bfgs_b are available".format(op_method))

    # Additional metadata common to both optimizers.
    metadata.update(dict(op_method=op_method, op_time=time() - t_init,
        initial_theta=initial_theta, initial_theta_source=initial_theta_source))

    # De-censor the optimized parameters.
    if any(censored_theta):
        theta = np.zeros(censored_theta.size)
        theta[~censored_theta] = op_params

    else:
        theta = op_params

    # Fit the scatter.
    residuals_squared = (flux - np.dot(theta, design_matrix.T))**2
    scatter = op.fmin(_scatter_objective_function, 0.0,
        args=(residuals_squared, ivar), disp=False)

    return (theta, scatter**2, metadata)


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4			"""
5			Fitting functions for use in The Cannon.
6			"""
7
8			from __future__ import (division, print_function, absolute_import,
9			unicode_literals)
10
11			__all__ = ["fit_spectrum", "fit_pixel_fixed_scatter", "fit_theta_by_linalg",
12			"chi_sq", "L1Norm_variation"]
13
14			import logging
15			import numpy as np
16			import scipy.optimize as op
17			from time import time
18
19			logger = logging.getLogger(__name__)
20
21
22			def fit_spectrum(flux, ivar, initial_labels, vectorizer, theta, s2, fiducials,
23			scales, dispersion=None, **kwargs):
24			"""
25			Fit a single spectrum by least-squared fitting.
26
27			:param flux:
28			The normalized flux values.
29
30			:param ivar:
31			The inverse variance array for the normalized fluxes.
32
33			:param initial_labels:
34			The point(s) to initialize optimization from.
35
36			:param vectorizer:
37			The vectorizer to use when fitting the data.
38
39			:param theta:
40			The theta coefficients (spectral derivatives) of the trained model.
41
42			:param s2:
43			The pixel scatter (s^2) array for each pixel.
44
45			:param dispersion: [optional]
46			The dispersion (e.g., wavelength) points for the normalized fluxes.
47
48			:returns:
49			A three-length tuple containing: the optimized labels, the covariance
50			matrix, and metadata associated with the optimization.
51			"""
52
53			adjusted_ivar = ivar/(1. + ivar * s2)
54
55			# Exclude non-finite points (e.g., points with zero inverse variance
56			# or non-finite flux values, but the latter shouldn't exist anyway).
57			use = np.isfinite(flux * adjusted_ivar) * (adjusted_ivar > 0)
58			L = len(vectorizer.label_names)
59
60			if not np.any(use):
61			logger.warn("No information in spectrum!")
62			return (np.nan * np.ones(L), None, {
63			"fail_message": "Pixels contained no information"})
64
65			# Splice the arrays we will use most.
66			flux = flux[use]
67			weights = np.sqrt(adjusted_ivar[use]) # --> 1.0 / sigma
68			use_theta = theta[use]
69
70			initial_labels = np.atleast_2d(initial_labels)
71
72			# Check the vectorizer whether it has a derivative built in.
73			Dfun = kwargs.pop("Dfun", True)
74			if Dfun not in (None, False):
75			try:
76			vectorizer.get_label_vector_derivative(initial_labels[0])
77
78			except NotImplementedError:
79			Dfun = None
80			logger.warn("No label vector derivatives available in {}!".format(
81			vectorizer))
82
83			except:
84			logger.exception("Exception raised when trying to calculate the "\
85			"label vector derivative at the fiducial values:")
86			raise
87
88			else:
89			# Use the label vector derivative.
90			Dfun = lambda parameters: weights * np.dot(use_theta,
91			vectorizer.get_label_vector_derivative(parameters)).T
92
93			else:
94			Dfun = None
95
96			def func(parameters):
97			return np.dot(use_theta, vectorizer(parameters))[:, 0]
98
99			def residuals(parameters):
100			return weights * (func(parameters) - flux)
101
102			kwds = {
103			"func": residuals,
104			"Dfun": Dfun,
105			"col_deriv": True,
106
107			# These get passed through to leastsq:
108			"ftol": 7./3 - 4./3 - 1, # Machine precision.
109			"xtol": 7./3 - 4./3 - 1, # Machine precision.
110			"gtol": 0.0,
111			"maxfev": 100000, # MAGIC
112			"epsfcn": None,
113			"factor": 1.0,
114			}
115
116			# Only update the keywords with things that op.curve_fit/op.leastsq expects.
117			for key in set(kwargs).intersection(kwds):
118			kwds[key] = kwargs[key]
119
120			results = []
121			for x0 in initial_labels:
122
123			try:
124			op_labels, cov, meta, mesg, ier = op.leastsq(
125			x0=(x0 - fiducials)/scales, full_output=True, **kwds)
126
127			except RuntimeError:
128			logger.exception("Exception in fitting from {}".format(x0))
129			continue
130
131			meta.update(
132			dict(x0=x0, chi_sq=np.sum(meta["fvec"]**2), ier=ier, mesg=mesg))
133			results.append((op_labels, cov, meta))
134
135			if len(results) == 0:
136			logger.warn("No results found!")
137			return (np.nan * np.ones(L), None, dict(fail_message="No results found"))
138
139			best_result_index = np.nanargmin([m["chi_sq"] for (o, c, m) in results])
140			op_labels, cov, meta = results[best_result_index]
141
142			# De-scale the optimized labels.
143			meta["model_flux"] = func(op_labels)
144			op_labels = op_labels * scales + fiducials
145
146			if np.allclose(op_labels, meta["x0"]):
147			logger.warn(
148			"Discarding optimized result because it is exactly the same as the "
149			"initial value!")
150
151			# We are in dire straits. We should not trust the result.
152			op_labels *= np.nan
153			meta["fail_message"] = "Optimized result same as initial value."
154
155			if cov is None:
156			cov = np.ones((len(op_labels), len(op_labels)))
157
158			if not np.any(np.isfinite(cov)):
159			logger.warn("Non-finite covariance matrix returned!")
160
161			# Save additional information.
162			meta.update({
163			"method": "leastsq",
164			"label_names": vectorizer.label_names,
165			"best_result_index": best_result_index,
166			"derivatives_used": Dfun is not None,
167			"snr": np.nanmedian(flux * weights),
168			"r_chi_sq": meta["chi_sq"]/(use.sum() - L - 1),
169			})
170			for key in ("ftol", "xtol", "gtol", "maxfev", "factor", "epsfcn"):
171			meta[key] = kwds[key]
172
173			return (op_labels, cov, meta)
174
175
176
177			def fit_theta_by_linalg(flux, ivar, s2, design_matrix):
178			"""
179			Fit theta coefficients to a set of normalized fluxes for a single pixel.
180
181			:param flux:
182			The normalized fluxes for a single pixel (across many stars).
183
184			:param ivar:
185			The inverse variance of the normalized flux values for a single pixel
186			across many stars.
187
188			:param s2:
189			The noise residual (squared scatter term) to adopt in the pixel.
190
191			:param design_matrix:
192			The model design matrix.
193
194			:returns:
195			The label vector coefficients for the pixel, and the inverse variance
196			matrix.
197			"""
198
199			adjusted_ivar = ivar/(1. + ivar * s2)
200			CiA = design_matrix * np.tile(adjusted_ivar, (design_matrix.shape[1], 1)).T
201			try:
202			ATCiAinv = np.linalg.inv(np.dot(design_matrix.T, CiA))
203			except np.linalg.linalg.LinAlgError:
204			N = design_matrix.shape[1]
205			return (np.hstack([1, np.zeros(N - 1)]), np.inf * np.eye(N))
206
207			ATY = np.dot(design_matrix.T, flux * adjusted_ivar)
208			theta = np.dot(ATCiAinv, ATY)
209
210			return (theta, ATCiAinv)
211
212
213
214			# TODO: This logic should probably go somewhere else.
215
216
217			def chi_sq(theta, design_matrix, flux, ivar, axis=None, gradient=True):
218			"""
219			Calculate the chi-squared difference between the spectral model and flux.
220
221			:param theta:
222			The theta coefficients.
223
224			:param design_matrix:
225			The model design matrix.
226
227			:param flux:
228			The normalized flux values.
229
230			:param ivar:
231			The inverse variances of the normalized flux values.
232
233			:param axis: [optional]
234			The axis to sum the chi-squared values across.
235
236			:param gradient: [optional]
237			Return the chi-squared value and its derivatives (Jacobian).
238
239			:returns:
240			The chi-squared difference between the spectral model and flux, and
241			optionally, the Jacobian.
242			"""
243			residuals = np.dot(theta, design_matrix.T) - flux
244
245			ivar_residuals = ivar * residuals
246			f = np.sum(ivar_residuals * residuals, axis=axis)
247			if not gradient:
248			return f
249
250			g = 2.0 * np.dot(design_matrix.T, ivar_residuals)
251			return (f, g)
252
253
254			def L1Norm_variation(theta):
255			"""
256			Return the L1 norm of theta (except the first entry) and its derivative.
257
258			:param theta:
259			An array of finite values.
260
261			:returns:
262			A two-length tuple containing: the L1 norm of theta (except the first
263			entry), and the derivative of the L1 norm of theta.
264			"""
265
266			return (np.sum(np.abs(theta[1:])), np.hstack([0.0, np.sign(theta[1:])]))
267
268
269			def _pixel_objective_function_fixed_scatter(theta, design_matrix, flux, ivar,
270			regularization, gradient=True):
271			"""
272			The objective function for a single regularized pixel with fixed scatter.
273
274			:param theta:
275			The spectral coefficients.
276
277			:param normalized_flux:
278			The normalized flux values for a single pixel across many stars.
279
280			:param adjusted_ivar:
281			The adjusted inverse variance of the normalized flux values for a single
282			pixel across many stars. This adjusted inverse variance array should
283			already have the scatter included.
284
285			:param regularization:
286			The regularization term to scale the L1 norm of theta with.
287
288			:param design_matrix:
289			The design matrix for the model.
290
291			:param gradient: [optional]
292			Also return the analytic derivative of the objective function.
293			"""
294
295			if gradient:
296			csq, d_csq = chi_sq(theta, design_matrix, flux, ivar, gradient=True)
297			L1, d_L1 = L1Norm_variation(theta)
298
299			f = csq + regularization * L1
300			g = d_csq + regularization * d_L1
301
302			return (f, g)
303
304			else:
305			csq = chi_sq(theta, design_matrix, flux, ivar, gradient=False)
306			L1, d_L1 = L1Norm_variation(theta)
307
308			return csq + regularization * L1
309
310
311			def _scatter_objective_function(scatter, residuals_squared, ivar):
312			adjusted_ivar = ivar/(1.0 + ivar * scatter**2)
313			chi_sq = residuals_squared * adjusted_ivar
314			return (np.mean(chi_sq) - 1.0)**2
315
316
317			def _remove_forbidden_op_kwds(op_method, op_kwds):
318			"""
319			Remove forbidden optimization keywords.
320
321			:param op_method:
322			The optimization algorithm to use.
323
324			:param op_kwds:
325			Optimization keywords.
326
327			:returns:
328			`None`. The dictionary of `op_kwds` will be updated.
329			"""
330			all_allowed_keys = dict(
331			l_bfgs_b=("bounds", "m", "factr", "pgtol", "epsilon", "iprint",
332			"maxfun", "maxiter", "disp", "callback", "maxls"),
333			powell=("xtol", "ftol", "maxiter", "maxfun", "full_output", "disp",
334			"retall", "callback", "initial_simplex"))
335
336			forbidden_keys = set(op_kwds).difference(all_allowed_keys[op_method])
337			if forbidden_keys:
338			logger.warn("Ignoring forbidden optimization keywords for {}: {}"\
339			.format(op_method, ", ".join(forbidden_keys)))
340			for key in forbidden_keys:
341			del op_kwds[key]
342
343			return None
344
345
346
347			def fit_pixel_fixed_scatter(flux, ivar, initial_thetas, design_matrix,
348			regularization, censoring_mask, **kwargs):
349			"""
350			Fit theta coefficients and noise residual for a single pixel, using
351			an initially fixed scatter value.
352
353			:param flux:
354			The normalized flux values.
355
356			:param ivar:
357			The inverse variance array for the normalized fluxes.
358
359			:param initial_thetas:
360			A list of initial theta values to start from, and their source. For
361			example: `[(theta_0, "guess"), (theta_1, "old_theta")]
362
363			:param design_matrix:
364			The model design matrix.
365
366			:param regularization:
367			The regularization strength to apply during optimization (Lambda).
368
369			:param censoring_mask:
370			A per-label censoring mask for each pixel.
371
372			:keyword op_method:
373			The optimization method to use. Valid options are: `l_bfgs_b`, `powell`.
374
375			:keyword op_kwds:
376			A dictionary of arguments that will be provided to the optimizer.
377
378			:returns:
379			The optimized theta coefficients, the noise residual `s2`, and
380			metadata related to the optimization process.
381			"""
382
383			if np.sum(ivar) < 1.0 * ivar.size: # MAGIC
384			metadata = dict(message="No pixel information.", op_time=0.0)
385			fiducial = np.hstack([1.0, np.zeros(design_matrix.shape[1] - 1)])
386			return (fiducial, np.inf, metadata) # MAGIC
387
388			# Determine if any theta coefficients will be censored.
389			censored_theta = ~np.any(np.isfinite(design_matrix), axis=0)
390			# Make the design matrix safe to use.
391			design_matrix[:, censored_theta] = 0
392
393			feval = []
394			for initial_theta, initial_theta_source in initial_thetas:
395			feval.append(_pixel_objective_function_fixed_scatter(
396			initial_theta, design_matrix, flux, ivar, regularization, False))
397
398			initial_theta, initial_theta_source = initial_thetas[np.nanargmin(feval)]
399
400			base_op_kwds = dict(x0=initial_theta,
401			args=(design_matrix, flux, ivar, regularization),
402			disp=False, maxfun=np.inf, maxiter=np.inf)
403
404			if any(censored_theta):
405			# If the initial_theta is the same size as the censored_mask, but different
406			# to the design_matrix, then we need to censor the initial theta so that we
407			# don't bother solving for those parameters.
408			base_op_kwds["x0"] = np.array(base_op_kwds["x0"])[~censored_theta]
409			base_op_kwds["args"] = (design_matrix[:, ~censored_theta], flux, ivar,
410			regularization)
411
412			# Allow either l_bfgs_b or powell
413			t_init = time()
414			default_op_method = "l_bfgs_b"
415			op_method = kwargs.get("op_method", default_op_method) or default_op_method
416			op_method = op_method.lower()
417
418			while True:
419			if op_method == "l_bfgs_b":
420			op_kwds = base_op_kwds.copy()
421			op_kwds.update(m=design_matrix.shape[1], factr=10.0, pgtol=1e-6)
422			op_kwds.update((kwargs.get("op_kwds", {}) or {}))
423
424			# If op_bounds are given and we are censoring some theta terms, then we
425			# will need to adjust which op_bounds we provide.
426			if "bounds" in op_kwds and any(censored_theta):
427			op_kwds["bounds"] = [b for b, is_censored in \
428			zip(op_kwds["bounds"], censored_theta) if not is_censored]
429
430			# Only include allowable keywords.
431			_remove_forbidden_op_kwds(op_method, op_kwds)
432
433			op_params, fopt, metadata = op.fmin_l_bfgs_b(
434			_pixel_objective_function_fixed_scatter,
435			fprime=None, approx_grad=None, **op_kwds)
436
437			metadata.update(dict(fopt=fopt))
438
439			warnflag = metadata.get("warnflag", -1)
440			if warnflag > 0:
441			reason = "too many function evaluations or too many iterations" \
442			if warnflag == 1 else metadata["task"]
443			logger.warn("Optimization warning (l_bfgs_b): {}".format(reason))
444
445			# Do optimization again.
446			op_method = "powell"
447			base_op_kwds.update(x0=op_params)
448
449			else:
450			break
451
452			elif op_method == "powell":
453			op_kwds = base_op_kwds.copy()
454			op_kwds.update(xtol=1e-6, ftol=1e-6)
455			op_kwds.update((kwargs.get("op_kwds", {}) or {}))
456
457			# Only include allowable keywords.
458			_remove_forbidden_op_kwds(op_method, op_kwds)
459
460			# Set 'False' in args so that we don't return the gradient,
461			# because fmin doesn't want it.
462			args = list(op_kwds["args"])
463			args.append(False)
464			op_kwds["args"] = tuple(args)
465
466			t_init = time()
467
468			op_params, fopt, direc, n_iter, n_funcs, warnflag = op.fmin_powell(
469			_pixel_objective_function_fixed_scatter,
470			full_output=True, **op_kwds)
471
472			metadata = dict(fopt=fopt, direc=direc, n_iter=n_iter,
473			n_funcs=n_funcs, warnflag=warnflag)
474			break
475
476			else:
477			raise ValueError("unknown optimization method '{}' -- "
478			"powell or l_bfgs_b are available".format(op_method))
479
480			# Additional metadata common to both optimizers.
481			metadata.update(dict(op_method=op_method, op_time=time() - t_init,
482			initial_theta=initial_theta, initial_theta_source=initial_theta_source))
483
484			# De-censor the optimized parameters.
485			if any(censored_theta):
486			theta = np.zeros(censored_theta.size)
487			theta[~censored_theta] = op_params
488
489			else:
490			theta = op_params
491
492			# Fit the scatter.
493			residuals_squared = (flux - np.dot(theta, design_matrix.T))**2
494			scatter = op.fmin(_scatter_objective_function, 0.0,
495			args=(residuals_squared, ivar), disp=False)
496
497			return (theta, scatter**2, metadata)
498

andycasey / AnniesLasso

Push — master ( 2074c3...e65310 )

_remove_forbidden_op_kwds() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like