Completed
Push — master ( cb4176...bf1eb1 )
by Andy
24s
created

normalize()   A

Complexity

Conditions 1

Size

Total Lines 54

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 1
c 1
b 0
f 1
dl 0
loc 54
rs 9.6716

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
"""
5
Continuum-normalization.
6
"""
7
8
from __future__ import (division, print_function, absolute_import,
9
                        unicode_literals)
10
11
__all__ = ["normalize", "sines_and_cosines"]
12
13
import logging
14
import numpy as np
15
16
17
def _continuum_design_matrix(dispersion, L, order):
18
    """
19
    Build a design matrix for the continuum determination, using sines and
20
    cosines.
21
22
    :param dispersion:
23
        An array of dispersion points.
24
25
    :param L:
26
        The length-scale for the sine and cosine functions.
27
28
    :param order:
29
        The number of sines and cosines to use in the fit.
30
    """
31
32
    L, dispersion = float(L), np.array(dispersion)
33
    scale = 2 * (np.pi / L)
34
    return np.vstack([
35
        np.ones_like(dispersion).reshape((1, -1)), 
36
        np.array([
37
            [np.cos(o * scale * dispersion), np.sin(o * scale * dispersion)] \
38
            for o in range(1, order + 1)]).reshape((2 * order, dispersion.size))
39
        ])
40
41
42
def sines_and_cosines(dispersion, flux, ivar, continuum_pixels, L=1400, order=3, 
43
    regions=None, fill_value=1.0, **kwargs):
44
    """
45
    Fit the flux values of pre-defined continuum pixels using a sum of sine and
46
    cosine functions.
47
48
    :param dispersion:
49
        The dispersion values.
50
51
    :param flux:
52
        The flux values for all pixels, as they correspond to the `dispersion`
53
        array.
54
55
    :param ivar:
56
        The inverse variances for all pixels, as they correspond to the
57
        `dispersion` array.
58
59
    :param continuum_pixels:
60
        A mask that selects pixels that should be considered as 'continuum'.
61
62
    :param L: [optional]
63
        The length scale for the sines and cosines.
64
65
    :param order: [optional]
66
        The number of sine/cosine functions to use in the fit.
67
68
    :param regions: [optional]
69
        Specify sections of the spectra that should be fitted separately in each
70
        star. This may be due to gaps between CCDs, or some other physically-
71
        motivated reason. These values should be specified in the same units as
72
        the `dispersion`, and should be given as a list of `[(start, end), ...]`
73
        values. For example, APOGEE spectra have gaps near the following
74
        wavelengths which could be used as `regions`:
75
76
        >> regions = ([15090, 15822], [15823, 16451], [16452, 16971])
77
78
    :param fill_value: [optional]
79
        The continuum value to use for when no continuum was calculated for that
80
        particular pixel (e.g., the pixel is outside of the `regions`).
81
82
    :param full_output: [optional]
83
        If set as True, then a metadata dictionary will also be returned.
84
85
    :returns:
86
        The continuum values for all pixels, and a dictionary that contains 
87
        metadata about the fit.
88
    """
89
90
    scalar = kwargs.pop("__magic_scalar", 1e-6) # MAGIC
91
    flux, ivar = np.atleast_2d(flux), np.atleast_2d(ivar)
92
93
    if regions is None:
94
        regions = [(dispersion[0], dispersion[-1])]
95
96
    region_masks = []
97
    region_matrices = []
98
    continuum_masks = []
99
    continuum_matrices = []
100
    pixel_included_in_regions = np.zeros_like(flux).astype(int)
101
    for start, end in regions:
102
103
        # Build the masks for this region.
104
        si, ei = np.searchsorted(dispersion, (start, end))
105
        region_mask = (end >= dispersion) * (dispersion >= start)
106
        region_masks.append(region_mask)
107
        pixel_included_in_regions[:, region_mask] += 1
108
109
        continuum_masks.append(continuum_pixels[
110
            (ei >= continuum_pixels) * (continuum_pixels >= si)])
111
112
        # Build the design matrices for this region.
113
        region_matrices.append(
114
            _continuum_design_matrix(dispersion[region_masks[-1]], L, order))
115
        continuum_matrices.append(
116
            _continuum_design_matrix(dispersion[continuum_masks[-1]], L, order))
117
118
        # TODO: ISSUE: Check for overlapping regions and raise an warning.
119
120
    # Check for non-zero pixels (e.g. ivar > 0) that are not included in a
121
    # region. We should warn about this very loudly!
122
    warn_on_pixels = (pixel_included_in_regions == 0) * (ivar > 0)
123
124
    metadata = []
125
    continuum = np.ones_like(flux) * fill_value
126
    for i in range(flux.shape[0]):
127
128
        warn_indices = np.where(warn_on_pixels[i])[0]
129
        if any(warn_indices):
130
            # Split by deltas so that we give useful warning messages.
131
            segment_indices = np.where(np.diff(warn_indices) > 1)[0]
132
            segment_indices = np.sort(np.hstack(
133
                [0, segment_indices, segment_indices + 1, len(warn_indices)]))
134
            segment_indices = segment_indices.reshape(-1, 2)
135
136
            segments = ", ".join(["{:.1f} to {:.1f} ({:d} pixels)".format(
137
                dispersion[s], dispersion[e], e-s) for s, e in segment_indices])
138
139
            logging.warn("Some pixels in spectrum index {0} have measured flux "
140
                         "values (e.g., ivar > 0) but are not included in any "
141
                         "specified continuum region. These pixels won't be "
142
                         "continuum-normalised: {1}".format(i, segments))
143
144
            
145
        # Get the flux and inverse variance for this object.
146
        object_metadata = []
147
        object_flux, object_ivar = (flux[i], ivar[i])
148
149
        # Normalize each region.
150
        for region_mask, region_matrix, continuum_mask, continuum_matrix in \
151
        zip(region_masks, region_matrices, continuum_masks, continuum_matrices):
152
            if continuum_mask.size == 0:
153
                # Skipping..
154
                object_metadata.append([order, L, fill_value, scalar, [], None])
155
                continue
156
157
            # We will fit to continuum pixels only.   
158
            continuum_disp = dispersion[continuum_mask] 
159
            continuum_flux, continuum_ivar \
160
                = (object_flux[continuum_mask], object_ivar[continuum_mask])
161
162
            # Solve for the amplitudes.
163
            M = continuum_matrix
164
            MTM = np.dot(M, continuum_ivar[:, None] * M.T)
165
            MTy = np.dot(M, (continuum_ivar * continuum_flux).T)
166
167
            eigenvalues = np.linalg.eigvalsh(MTM)
168
            MTM[np.diag_indices(len(MTM))] += scalar * np.max(eigenvalues)
169
            eigenvalues = np.linalg.eigvalsh(MTM)
170
            condition_number = max(eigenvalues)/min(eigenvalues)
171
172
            amplitudes = np.linalg.solve(MTM, MTy)
173
            continuum[i, region_mask] = np.dot(region_matrix.T, amplitudes)
174
            object_metadata.append(
175
                (order, L, fill_value, scalar, amplitudes, condition_number))
176
177
        metadata.append(object_metadata)
178
179
    return (continuum, metadata) 
180
    
181
182
def normalize(dispersion, flux, ivar, continuum_pixels, L=1400, order=3, 
183
    regions=None, fill_value=1.0, **kwargs):
184
    """
185
    Pseudo-continuum-normalize the flux using a defined set of continuum pixels
186
    and a sum of sine and cosine functions.
187
188
    :param dispersion:
189
        The dispersion values.
190
191
    :param flux:
192
        The flux values for all pixels, as they correspond to the `dispersion`
193
        array.
194
195
    :param ivar:
196
        The inverse variances for all pixels, as they correspond to the
197
        `dispersion` array.
198
199
    :param continuum_pixels:
200
        A mask that selects pixels that should be considered as 'continuum'.
201
202
    :param L: [optional]
203
        The length scale for the sines and cosines.
204
205
    :param order: [optional]
206
        The number of sine/cosine functions to use in the fit.
207
208
    :param regions: [optional]
209
        Specify sections of the spectra that should be fitted separately in each
210
        star. This may be due to gaps between CCDs, or some other physically-
211
        motivated reason. These values should be specified in the same units as
212
        the `dispersion`, and should be given as a list of `[(start, end), ...]`
213
        values. For example, APOGEE spectra have gaps near the following
214
        wavelengths which could be used as `regions`:
215
216
        >> regions = ([15090, 15822], [15823, 16451], [16452, 16971])
217
218
    :param fill_value: [optional]
219
        The continuum value to use for when no continuum was calculated for that
220
        particular pixel (e.g., the pixel is outside of the `regions`).
221
222
    :param full_output: [optional]
223
        If set as True, then a metadata dictionary will also be returned.
224
225
    :returns:
226
        The continuum values for all pixels, and a dictionary that contains 
227
        metadata about the fit.
228
    """
229
    continuum, metadata = sines_and_cosines(dispersion, flux, ivar, **kwargs)
230
231
    normalized_flux = flux/continuum
232
    normalized_ivar = continuum * ivar * continuum
233
    normalized_flux[normalized_ivar == 0] = 1.0
234
    
235
    return (normalized_flux, normalized_ivar, continuum, metadata)
236
237
238