Completed
Pull Request — master (#1)
by Andy
01:12
created

AnniesLasso.CannonModel.train()   B

Complexity

Conditions 6

Size

Total Lines 46

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 46
rs 7.5748
cc 6
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
"""
5
A pedestrian version of The Cannon.
6
"""
7
8
from __future__ import (division, print_function, absolute_import,
9
                        unicode_literals)
10
11
__all__ = ["CannonModel"]
12
13
import logging
14
import numpy as np
15
import scipy.optimize as op
16
17
from . import (model, utils)
18
19
logger = logging.getLogger(__name__)
20
21
22
class CannonModel(model.BaseCannonModel):
23
    """
24
    A generalised Cannon model for the estimation of arbitrary stellar labels.
25
26
    :param labels:
27
        A table with columns as labels, and stars as rows.
28
29
    :type labels:
30
        :class:`~astropy.table.Table` or numpy structured array
31
32
    :param fluxes:
33
        An array of fluxes for stars in the training set, given as shape
34
        `(num_stars, num_pixels)`. The `num_stars` should match the number of
35
        rows in `labels`.
36
37
    :type fluxes:
38
        :class:`np.ndarray`
39
40
    :param flux_uncertainties:
41
        An array of 1-sigma flux uncertainties for stars in the training set,
42
        The shape of the `flux_uncertainties` should match `fluxes`. 
43
44
    :type flux_uncertainties:
45
        :class:`np.ndarray`
46
47
    :param dispersion: [optional]
48
        The dispersion values corresponding to the given pixels. If provided, 
49
        this should have length `num_pixels`.
50
51
    :param live_dangerously: [optional]
52
        If enabled then no checks will be made on the label names, prohibiting
53
        the user to input human-readable forms of the label vector.
54
    """
55
56
    _descriptive_attributes = ["_label_vector"]
57
    _trained_attributes = ["_coefficients", "_scatter"]
58
    _data_attributes = \
59
        ["training_labels", "training_fluxes", "training_flux_uncertainties"]
60
    
61
    def __init__(self, *args, **kwargs):
62
        super(CannonModel, self).__init__(*args, **kwargs)
63
64
65
    @model.requires_label_vector
66
    def train(self, **kwargs):
67
        """
68
        Train the model based on the training set and the description of the
69
        label vector.
70
        """
71
        
72
        # Initialise the scatter and coefficient arrays.
73
        N_px = len(self.dispersion)
74
        scatter = np.nan * np.ones(N_px)
75
        label_vector_array = self.label_vector_array
76
        theta = np.nan * np.ones((N_px, label_vector_array.shape[0]))
77
78
        # Details for the progressbar.
79
        pb_kwds = {
80
            "message": "Training Cannon model from {0} stars with {1} pixels "
81
                       "each".format(len(self.training_labels), N_px),
82
            "size": 100 if kwargs.pop("progressbar", True) else -1
83
        }
84
        
85
        if self.pool is None:
86
            for pixel in utils.progressbar(range(N_px), **pb_kwds):
87
                theta[pixel, :], scatter[pixel] = _fit_pixel(
88
                    self.training_fluxes[:, pixel], 
89
                    self.training_flux_uncertainties[:, pixel],
90
                    label_vector_array, **kwargs)
91
92
        else:
93
            # Not as nice as just mapping, but necessary for a progress bar.
94
            process = { pixel: self.pool.apply_async(
95
                    _fit_pixel,
96
                    args=(
97
                        self.training_fluxes[:, pixel], 
98
                        self.training_flux_uncertainties[:, pixel],
99
                        label_vector_array
100
                    ),
101
                    kwds=kwargs) \
102
                for pixel in range(N_px) }
103
104
            for pixel, proc in utils.progressbar(process.items(), **pb_kwds):
105
                theta[pixel, :], scatter[pixel] = proc.get()
106
107
        self.coefficients, self.scatter = (theta, scatter)
108
        self._trained = True
109
110
        return (theta, scatter)
111
112
113
    @model.requires_training_wheels
114
    def predict(self, labels=None, **labels_as_kwargs):
115
        """
116
        Predict spectra from the trained model, given the labels.
117
118
        :param labels: [optional]
119
            The labels required for the trained model. This should be a N-length
120
            list matching the number of unique terms in the model, in the order
121
            given by `self.labels`. Alternatively, labels can be explicitly 
122
            given as keyword arguments.
123
        """
124
125
        labels = self._format_input_labels(labels, **labels_as_kwargs)
126
127
        return np.dot(self.coefficients, 
128
            model._build_label_vector_rows(self.label_vector, labels)).flatten()
129
130
131
    @model.requires_training_wheels
132
    def fit(self, fluxes, flux_uncertainties, **kwargs):
133
        """
134
        Solve the labels for given pixel fluxes and uncertainties.
135
136
        :param fluxes:
137
            The normalised fluxes. These should be on the same dispersion scale
138
            as the trained data.
139
140
        :param flux_uncertainties:
141
            The 1-sigma uncertainties in the fluxes. This should have the same
142
            shape as `fluxes`.
143
144
        :returns:
145
            The labels and covariance matrix.
146
        """
147
148
        label_indices = self._get_lowest_order_label_indices()
149
        fluxes, flux_uncertainties = map(np.array, (fluxes, flux_uncertainties))
150
151
        # TODO: Consider parallelising this, which would mean factoring
152
        # _fit out of the model class, which gets messy.
153
        # Since solving for labels is not a big bottleneck (yet), let's leave
154
        # this.
155
156
        full_output = kwargs.pop("full_output", False)
157
        if fluxes.ndim == 1:
158
            labels, covariance = \
159
                self._fit(fluxes, flux_uncertainties, label_indices, **kwargs)
160
        else:
161
            N_stars, N_labels = (fluxes.shape[0], len(self.labels))
162
            labels = np.empty((N_stars, N_labels), dtype=float)
163
            covariance = np.empty((N_stars, N_labels, N_labels), dtype=float)
164
165
            for i, (f, u) in enumerate(zip(fluxes, flux_uncertainties)):
166
                labels[i, :], covariance[i, :] = \
167
                    self._fit(f, u, label_indices, **kwargs)
168
169
        if full_output:
170
            return (labels, covariance)
171
        return labels
172
173
174
    def _fit(self, fluxes, flux_uncertainties, label_indices, **kwargs):
175
        """
176
        Solve the labels for given pixel fluxes and uncertainties
177
        for a single star.
178
179
        :param fluxes:
180
            The normalised fluxes. These should be on the same dispersion scale
181
            as the trained data.
182
183
        :param flux_uncertainties:
184
            The 1-sigma uncertainties in the fluxes. This should have the same
185
            shape as `fluxes`.
186
187
        :returns:
188
            The labels and covariance matrix.
189
        """
190
191
        # Check which pixels to use, then just use those.
192
        use = (flux_uncertainties < kwargs.get("max_uncertainty", 1)) \
193
            * np.isfinite(self.coefficients[:, 0] * fluxes * flux_uncertainties)
194
195
        fluxes = fluxes.copy()[use]
196
        flux_uncertainties = flux_uncertainties.copy()[use]
197
        scatter, coefficients = self.scatter[use], self.coefficients[use]
198
199
        Cinv = 1.0 / (scatter**2 + flux_uncertainties**2)
200
        A = np.dot(coefficients.T, Cinv[:, None] * coefficients)
201
        B = np.dot(coefficients.T, Cinv * fluxes)
202
        theta_p0 = np.linalg.solve(A, B)
203
204
        # Need to match the initial theta coefficients back to label values.
205
        # (Maybe this should use some general non-linear simultaneous solver?)
206
        initial = {}
207
        for index in label_indices:
208
            if index is None: continue
209
            label, order = self.label_vector[index][0]
210
            # The +1 index offset is because the first theta is a scaling.
211
            value = abs(theta_p0[1 + index])**(1./order)
212
            if not np.isfinite(value): continue
213
            initial[label] = value
214
215
        missing = set(self.labels).difference(initial)
216
        if missing:
217
            # There must be some coefficients that are only used in cross-terms.
218
            # We could solve for them, or just take the mean of the training
219
            # set as the initial guess.
220
            initial.update({ label: \
221
                np.nanmean(self.training_labels[label]) for label in missing
222
            })
223
            
224
        # Create and test the generating function.
225
        def function(coeffs, *labels):
226
            return np.dot(coeffs, 
227
                model._build_label_vector_rows(self.label_vector, 
228
                    { label: [v] for label, v in zip(self.labels, labels) }
229
                )).flatten()
230
231
        # Solve for the parameters.
232
        kwds = {
233
            "p0": np.array([initial[label] for label in self.labels]),
234
            "maxfev": 10000,
235
            "sigma": 1.0/np.sqrt(Cinv),
236
            "absolute_sigma": True
237
        }
238
        kwds.update(kwargs)
239
        labels_opt, cov = op.curve_fit(function, coefficients, fluxes, **kwds)
240
241
        return (labels_opt, cov)
242
243
244
245
def _fit_pixel(fluxes, flux_uncertainties, label_vector_array, **kwargs):
246
    """
247
    Return the optimal label vector coefficients and scatter for a pixel, given
248
    the fluxes, uncertainties, and the label vector array.
249
250
    :param fluxes:
251
        The fluxes for the given pixel, from all stars.
252
253
    :param flux_uncertainties:
254
        The 1-sigma flux uncertainties for the given pixel, from all stars.
255
256
    :param label_vector_array:
257
        The label vector array. This should have shape `(N_stars, N_terms + 1)`.
258
259
    :returns:
260
        The optimised label vector coefficients and scatter for this pixel.
261
    """
262
263
    _ = kwargs.get("max_uncertainty", 1)
264
    failed_response = (np.nan * np.ones(label_vector_array.shape[0]), _)
265
    if np.all(flux_uncertainties > _):
266
        return failed_response
267
268
    # Get an initial guess of the scatter.
269
    scatter = np.var(fluxes) - np.median(flux_uncertainties)**2
270
    scatter = np.sqrt(scatter) if scatter >= 0 else np.std(fluxes)
271
    
272
    # Optimise the scatter, and at each scatter value we will calculate the
273
    # optimal vector coefficients.
274
    op_scatter, fopt, direc, n_iter, n_funcs, warnflag = op.fmin_powell(
275
        _pixel_scatter_nll, scatter,
276
        args=(fluxes, flux_uncertainties, label_vector_array),
277
        disp=False, full_output=True)
278
279
    if warnflag > 0:
280
        logger.warning("Warning: {}".format([
281
            "Maximum number of function evaluations made during optimisation.",
282
            "Maximum number of iterations made during optimisation."
283
            ][warnflag - 1]))
284
285
    # Calculate the coefficients at the optimal scatter value.
286
    # Note that if we can't solve for the coefficients, we should just set them
287
    # as zero and send back a giant variance.
288
    try:
289
        coefficients, ATCiAinv, variance = _fit_coefficients(
290
            fluxes, flux_uncertainties, op_scatter, label_vector_array)
291
292
    except np.linalg.linalg.LinAlgError:
293
        logger.exception("Failed to calculate coefficients")
294
        if kwargs.get("debug", False): raise
295
296
        return failed_response
297
298
    else:
299
        return (coefficients, op_scatter)
300
301
302
def _pixel_scatter_nll(scatter, fluxes, flux_uncertainties, label_vector_array,
303
    **kwargs):
304
    """
305
    Return the negative log-likelihood for the scatter in a single pixel.
306
307
    :param scatter:
308
        The model scatter in the pixel.
309
310
    :param fluxes:
311
        The fluxes for a given pixel (in many stars).
312
313
    :param flux_uncertainties:
314
        The 1-sigma uncertainties in the fluxes for a given pixel. This should
315
        have the same shape as `fluxes`.
316
317
    :param label_vector_array:
318
        The label vector array for each star, for the given pixel.
319
320
    :returns:
321
        The log-likelihood of the log scatter, given the fluxes and the label
322
        vector array.
323
324
    :raises np.linalg.linalg.LinAlgError:
325
        If there was an error in inverting a matrix, and `debug` is set to True.
326
    """
327
328
    if 0 > scatter:
329
        return np.inf
330
331
    try:
332
        # Calculate the coefficients for the given level of scatter.
333
        theta, ATCiAinv, variance = _fit_coefficients(
334
            fluxes, flux_uncertainties, scatter, label_vector_array)
335
336
    except np.linalg.linalg.LinAlgError:
337
        if kwargs.get("debug", False): raise
338
        return np.inf
339
340
    model = np.dot(theta, label_vector_array)
341
342
    return 0.5 * np.sum((fluxes - model)**2 / variance) \
343
        +  0.5 * np.sum(np.log(variance))
344
345
346
def _fit_coefficients(fluxes, flux_uncertainties, scatter, label_vector_array):
347
    """
348
    Fit model coefficients and scatter to a given set of normalised fluxes for a
349
    single pixel.
350
351
    :param fluxes:
352
        The normalised fluxes for a single pixel (in many stars).
353
354
    :param flux_uncertainties:
355
        The 1-sigma uncertainties in normalised fluxes. This should have the
356
        same shape as `fluxes`.
357
358
    :param label_vector_array:
359
        The label vector array for each pixel.
360
361
    :returns:
362
        The label vector coefficients for the pixel, the inverse variance matrix
363
        and the total pixel variance.
364
    """
365
366
    variance = flux_uncertainties**2 + scatter**2
367
    CiA = label_vector_array.T * \
368
        np.tile(1./variance, (label_vector_array.shape[0], 1)).T
369
    ATCiAinv = np.linalg.inv(np.dot(label_vector_array, CiA))
370
371
    ATY = np.dot(label_vector_array, fluxes/variance)
372
    theta = np.dot(ATCiAinv, ATY)
373
374
    return (theta, ATCiAinv, variance)
375