normalize() - Code Metrics - Inspection of "add normalize func" - andycasey/AnniesLasso - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( cb4176...bf1eb1 )

by Andy

created 2018-01-29 01:48 UTC

normalize() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
cc	1
c	1
b	0
f	1
dl	0
loc	54
rs	9.6716

How to fix Long Method

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Continuum-normalization.
"""

from __future__ import (division, print_function, absolute_import,
                        unicode_literals)

__all__ = ["normalize", "sines_and_cosines"]

import logging
import numpy as np


def _continuum_design_matrix(dispersion, L, order):
    """
    Build a design matrix for the continuum determination, using sines and
    cosines.

    :param dispersion:
        An array of dispersion points.

    :param L:
        The length-scale for the sine and cosine functions.

    :param order:
        The number of sines and cosines to use in the fit.
    """

    L, dispersion = float(L), np.array(dispersion)
    scale = 2 * (np.pi / L)
    return np.vstack([
        np.ones_like(dispersion).reshape((1, -1)), 
        np.array([
            [np.cos(o * scale * dispersion), np.sin(o * scale * dispersion)] \
            for o in range(1, order + 1)]).reshape((2 * order, dispersion.size))
        ])


def sines_and_cosines(dispersion, flux, ivar, continuum_pixels, L=1400, order=3, 
    regions=None, fill_value=1.0, **kwargs):
    """
    Fit the flux values of pre-defined continuum pixels using a sum of sine and
    cosine functions.

    :param dispersion:
        The dispersion values.

    :param flux:
        The flux values for all pixels, as they correspond to the `dispersion`
        array.

    :param ivar:
        The inverse variances for all pixels, as they correspond to the
        `dispersion` array.

    :param continuum_pixels:
        A mask that selects pixels that should be considered as 'continuum'.

    :param L: [optional]
        The length scale for the sines and cosines.

    :param order: [optional]
        The number of sine/cosine functions to use in the fit.

    :param regions: [optional]
        Specify sections of the spectra that should be fitted separately in each
        star. This may be due to gaps between CCDs, or some other physically-
        motivated reason. These values should be specified in the same units as
        the `dispersion`, and should be given as a list of `[(start, end), ...]`
        values. For example, APOGEE spectra have gaps near the following
        wavelengths which could be used as `regions`:

        >> regions = ([15090, 15822], [15823, 16451], [16452, 16971])

    :param fill_value: [optional]
        The continuum value to use for when no continuum was calculated for that
        particular pixel (e.g., the pixel is outside of the `regions`).

    :param full_output: [optional]
        If set as True, then a metadata dictionary will also be returned.

    :returns:
        The continuum values for all pixels, and a dictionary that contains 
        metadata about the fit.
    """

    scalar = kwargs.pop("__magic_scalar", 1e-6) # MAGIC
    flux, ivar = np.atleast_2d(flux), np.atleast_2d(ivar)

    if regions is None:
        regions = [(dispersion[0], dispersion[-1])]

    region_masks = []
    region_matrices = []
    continuum_masks = []
    continuum_matrices = []
    pixel_included_in_regions = np.zeros_like(flux).astype(int)
    for start, end in regions:

        # Build the masks for this region.
        si, ei = np.searchsorted(dispersion, (start, end))
        region_mask = (end >= dispersion) * (dispersion >= start)
        region_masks.append(region_mask)
        pixel_included_in_regions[:, region_mask] += 1

        continuum_masks.append(continuum_pixels[
            (ei >= continuum_pixels) * (continuum_pixels >= si)])

        # Build the design matrices for this region.
        region_matrices.append(
            _continuum_design_matrix(dispersion[region_masks[-1]], L, order))
        continuum_matrices.append(
            _continuum_design_matrix(dispersion[continuum_masks[-1]], L, order))

        # TODO: ISSUE: Check for overlapping regions and raise an warning.

    # Check for non-zero pixels (e.g. ivar > 0) that are not included in a
    # region. We should warn about this very loudly!
    warn_on_pixels = (pixel_included_in_regions == 0) * (ivar > 0)

    metadata = []
    continuum = np.ones_like(flux) * fill_value
    for i in range(flux.shape[0]):

        warn_indices = np.where(warn_on_pixels[i])[0]
        if any(warn_indices):
            # Split by deltas so that we give useful warning messages.
            segment_indices = np.where(np.diff(warn_indices) > 1)[0]
            segment_indices = np.sort(np.hstack(
                [0, segment_indices, segment_indices + 1, len(warn_indices)]))
            segment_indices = segment_indices.reshape(-1, 2)

            segments = ", ".join(["{:.1f} to {:.1f} ({:d} pixels)".format(
                dispersion[s], dispersion[e], e-s) for s, e in segment_indices])

            logging.warn("Some pixels in spectrum index {0} have measured flux "
                         "values (e.g., ivar > 0) but are not included in any "
                         "specified continuum region. These pixels won't be "
                         "continuum-normalised: {1}".format(i, segments))

            
        # Get the flux and inverse variance for this object.
        object_metadata = []
        object_flux, object_ivar = (flux[i], ivar[i])

        # Normalize each region.
        for region_mask, region_matrix, continuum_mask, continuum_matrix in \
        zip(region_masks, region_matrices, continuum_masks, continuum_matrices):
            if continuum_mask.size == 0:
                # Skipping..
                object_metadata.append([order, L, fill_value, scalar, [], None])
                continue

            # We will fit to continuum pixels only.   
            continuum_disp = dispersion[continuum_mask] 
            continuum_flux, continuum_ivar \
                = (object_flux[continuum_mask], object_ivar[continuum_mask])

            # Solve for the amplitudes.
            M = continuum_matrix
            MTM = np.dot(M, continuum_ivar[:, None] * M.T)
            MTy = np.dot(M, (continuum_ivar * continuum_flux).T)

            eigenvalues = np.linalg.eigvalsh(MTM)
            MTM[np.diag_indices(len(MTM))] += scalar * np.max(eigenvalues)
            eigenvalues = np.linalg.eigvalsh(MTM)
            condition_number = max(eigenvalues)/min(eigenvalues)

            amplitudes = np.linalg.solve(MTM, MTy)
            continuum[i, region_mask] = np.dot(region_matrix.T, amplitudes)
            object_metadata.append(
                (order, L, fill_value, scalar, amplitudes, condition_number))

        metadata.append(object_metadata)

    return (continuum, metadata) 
    

def normalize(dispersion, flux, ivar, continuum_pixels, L=1400, order=3, 
    regions=None, fill_value=1.0, **kwargs):
    """
    Pseudo-continuum-normalize the flux using a defined set of continuum pixels
    and a sum of sine and cosine functions.

    :param dispersion:
        The dispersion values.

    :param flux:
        The flux values for all pixels, as they correspond to the `dispersion`
        array.

    :param ivar:
        The inverse variances for all pixels, as they correspond to the
        `dispersion` array.

    :param continuum_pixels:
        A mask that selects pixels that should be considered as 'continuum'.

    :param L: [optional]
        The length scale for the sines and cosines.

    :param order: [optional]
        The number of sine/cosine functions to use in the fit.

    :param regions: [optional]
        Specify sections of the spectra that should be fitted separately in each
        star. This may be due to gaps between CCDs, or some other physically-
        motivated reason. These values should be specified in the same units as
        the `dispersion`, and should be given as a list of `[(start, end), ...]`
        values. For example, APOGEE spectra have gaps near the following
        wavelengths which could be used as `regions`:

        >> regions = ([15090, 15822], [15823, 16451], [16452, 16971])

    :param fill_value: [optional]
        The continuum value to use for when no continuum was calculated for that
        particular pixel (e.g., the pixel is outside of the `regions`).

    :param full_output: [optional]
        If set as True, then a metadata dictionary will also be returned.

    :returns:
        The continuum values for all pixels, and a dictionary that contains 
        metadata about the fit.
    """
    continuum, metadata = sines_and_cosines(dispersion, flux, ivar, **kwargs)

    normalized_flux = flux/continuum
    normalized_ivar = continuum * ivar * continuum
    normalized_flux[normalized_ivar == 0] = 1.0
    
    return (normalized_flux, normalized_ivar, continuum, metadata)




1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4			"""
5			Continuum-normalization.
6			"""
7
8			from __future__ import (division, print_function, absolute_import,
9			unicode_literals)
10
11			__all__ = ["normalize", "sines_and_cosines"]
12
13			import logging
14			import numpy as np
15
16
17			def _continuum_design_matrix(dispersion, L, order):
18			"""
19			Build a design matrix for the continuum determination, using sines and
20			cosines.
21
22			:param dispersion:
23			An array of dispersion points.
24
25			:param L:
26			The length-scale for the sine and cosine functions.
27
28			:param order:
29			The number of sines and cosines to use in the fit.
30			"""
31
32			L, dispersion = float(L), np.array(dispersion)
33			scale = 2 * (np.pi / L)
34			return np.vstack([
35			np.ones_like(dispersion).reshape((1, -1)),
36			np.array([
37			[np.cos(o * scale * dispersion), np.sin(o * scale * dispersion)] \
38			for o in range(1, order + 1)]).reshape((2 * order, dispersion.size))
39			])
40
41
42			def sines_and_cosines(dispersion, flux, ivar, continuum_pixels, L=1400, order=3,
43			regions=None, fill_value=1.0, **kwargs):
44			"""
45			Fit the flux values of pre-defined continuum pixels using a sum of sine and
46			cosine functions.
47
48			:param dispersion:
49			The dispersion values.
50
51			:param flux:
52			The flux values for all pixels, as they correspond to the `dispersion`
53			array.
54
55			:param ivar:
56			The inverse variances for all pixels, as they correspond to the
57			`dispersion` array.
58
59			:param continuum_pixels:
60			A mask that selects pixels that should be considered as 'continuum'.
61
62			:param L: [optional]
63			The length scale for the sines and cosines.
64
65			:param order: [optional]
66			The number of sine/cosine functions to use in the fit.
67
68			:param regions: [optional]
69			Specify sections of the spectra that should be fitted separately in each
70			star. This may be due to gaps between CCDs, or some other physically-
71			motivated reason. These values should be specified in the same units as
72			the `dispersion`, and should be given as a list of `[(start, end), ...]`
73			values. For example, APOGEE spectra have gaps near the following
74			wavelengths which could be used as `regions`:
75
76			>> regions = ([15090, 15822], [15823, 16451], [16452, 16971])
77
78			:param fill_value: [optional]
79			The continuum value to use for when no continuum was calculated for that
80			particular pixel (e.g., the pixel is outside of the `regions`).
81
82			:param full_output: [optional]
83			If set as True, then a metadata dictionary will also be returned.
84
85			:returns:
86			The continuum values for all pixels, and a dictionary that contains
87			metadata about the fit.
88			"""
89
90			scalar = kwargs.pop("__magic_scalar", 1e-6) # MAGIC
91			flux, ivar = np.atleast_2d(flux), np.atleast_2d(ivar)
92
93			if regions is None:
94			regions = [(dispersion[0], dispersion[-1])]
95
96			region_masks = []
97			region_matrices = []
98			continuum_masks = []
99			continuum_matrices = []
100			pixel_included_in_regions = np.zeros_like(flux).astype(int)
101			for start, end in regions:
102
103			# Build the masks for this region.
104			si, ei = np.searchsorted(dispersion, (start, end))
105			region_mask = (end >= dispersion) * (dispersion >= start)
106			region_masks.append(region_mask)
107			pixel_included_in_regions[:, region_mask] += 1
108
109			continuum_masks.append(continuum_pixels[
110			(ei >= continuum_pixels) * (continuum_pixels >= si)])
111
112			# Build the design matrices for this region.
113			region_matrices.append(
114			_continuum_design_matrix(dispersion[region_masks[-1]], L, order))
115			continuum_matrices.append(
116			_continuum_design_matrix(dispersion[continuum_masks[-1]], L, order))
117
118			# TODO: ISSUE: Check for overlapping regions and raise an warning.
119
120			# Check for non-zero pixels (e.g. ivar > 0) that are not included in a
121			# region. We should warn about this very loudly!
122			warn_on_pixels = (pixel_included_in_regions == 0) * (ivar > 0)
123
124			metadata = []
125			continuum = np.ones_like(flux) * fill_value
126			for i in range(flux.shape[0]):
127
128			warn_indices = np.where(warn_on_pixels[i])[0]
129			if any(warn_indices):
130			# Split by deltas so that we give useful warning messages.
131			segment_indices = np.where(np.diff(warn_indices) > 1)[0]
132			segment_indices = np.sort(np.hstack(
133			[0, segment_indices, segment_indices + 1, len(warn_indices)]))
134			segment_indices = segment_indices.reshape(-1, 2)
135
136			segments = ", ".join(["{:.1f} to {:.1f} ({:d} pixels)".format(
137			dispersion[s], dispersion[e], e-s) for s, e in segment_indices])
138
139			logging.warn("Some pixels in spectrum index {0} have measured flux "
140			"values (e.g., ivar > 0) but are not included in any "
141			"specified continuum region. These pixels won't be "
142			"continuum-normalised: {1}".format(i, segments))
143
144
145			# Get the flux and inverse variance for this object.
146			object_metadata = []
147			object_flux, object_ivar = (flux[i], ivar[i])
148
149			# Normalize each region.
150			for region_mask, region_matrix, continuum_mask, continuum_matrix in \
151			zip(region_masks, region_matrices, continuum_masks, continuum_matrices):
152			if continuum_mask.size == 0:
153			# Skipping..
154			object_metadata.append([order, L, fill_value, scalar, [], None])
155			continue
156
157			# We will fit to continuum pixels only.
158			continuum_disp = dispersion[continuum_mask]
159			continuum_flux, continuum_ivar \
160			= (object_flux[continuum_mask], object_ivar[continuum_mask])
161
162			# Solve for the amplitudes.
163			M = continuum_matrix
164			MTM = np.dot(M, continuum_ivar[:, None] * M.T)
165			MTy = np.dot(M, (continuum_ivar * continuum_flux).T)
166
167			eigenvalues = np.linalg.eigvalsh(MTM)
168			MTM[np.diag_indices(len(MTM))] += scalar * np.max(eigenvalues)
169			eigenvalues = np.linalg.eigvalsh(MTM)
170			condition_number = max(eigenvalues)/min(eigenvalues)
171
172			amplitudes = np.linalg.solve(MTM, MTy)
173			continuum[i, region_mask] = np.dot(region_matrix.T, amplitudes)
174			object_metadata.append(
175			(order, L, fill_value, scalar, amplitudes, condition_number))
176
177			metadata.append(object_metadata)
178
179			return (continuum, metadata)
180
181
182			def normalize(dispersion, flux, ivar, continuum_pixels, L=1400, order=3,
183			regions=None, fill_value=1.0, **kwargs):
184			"""
185			Pseudo-continuum-normalize the flux using a defined set of continuum pixels
186			and a sum of sine and cosine functions.
187
188			:param dispersion:
189			The dispersion values.
190
191			:param flux:
192			The flux values for all pixels, as they correspond to the `dispersion`
193			array.
194
195			:param ivar:
196			The inverse variances for all pixels, as they correspond to the
197			`dispersion` array.
198
199			:param continuum_pixels:
200			A mask that selects pixels that should be considered as 'continuum'.
201
202			:param L: [optional]
203			The length scale for the sines and cosines.
204
205			:param order: [optional]
206			The number of sine/cosine functions to use in the fit.
207
208			:param regions: [optional]
209			Specify sections of the spectra that should be fitted separately in each
210			star. This may be due to gaps between CCDs, or some other physically-
211			motivated reason. These values should be specified in the same units as
212			the `dispersion`, and should be given as a list of `[(start, end), ...]`
213			values. For example, APOGEE spectra have gaps near the following
214			wavelengths which could be used as `regions`:
215
216			>> regions = ([15090, 15822], [15823, 16451], [16452, 16971])
217
218			:param fill_value: [optional]
219			The continuum value to use for when no continuum was calculated for that
220			particular pixel (e.g., the pixel is outside of the `regions`).
221
222			:param full_output: [optional]
223			If set as True, then a metadata dictionary will also be returned.
224
225			:returns:
226			The continuum values for all pixels, and a dictionary that contains
227			metadata about the fit.
228			"""
229			continuum, metadata = sines_and_cosines(dispersion, flux, ivar, **kwargs)
230
231			normalized_flux = flux/continuum
232			normalized_ivar = continuum * ivar * continuum
233			normalized_flux[normalized_ivar == 0] = 1.0
234
235			return (normalized_flux, normalized_ivar, continuum, metadata)
236
237
238

andycasey / AnniesLasso

Push — master ( cb4176...bf1eb1 )

normalize() A

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like