reduce_point_density() - Code Metrics - Inspection of "Merge pull request #380 from dopplershift/thin-poi..." - Unidata/MetPy - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 42fce5...725411 )

by Ryan

created 2017-04-01 04:12 UTC

reduce_point_density() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	5
c	1
b	0
f	0
dl	0
loc	64
rs	8.2837

How to fix Long Method

# Copyright (c) 2008-2017 MetPy Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Contains a collection of generally useful calculation tools."""

import numpy as np
import numpy.ma as ma
from scipy.spatial import cKDTree

from ..package_tools import Exporter

exporter = Exporter(globals())


@exporter.export
def resample_nn_1d(a, centers):
    """Return one-dimensional nearest-neighbor indexes based on user-specified centers.

    Parameters
    ----------
    a : array-like
        1-dimensional array of numeric values from which to
        extract indexes of nearest-neighbors
    centers : array-like
        1-dimensional array of numeric values representing a subset of values to approximate

    Returns
    -------
        An array of indexes representing values closest to given array values
    """
    ix = []
    for center in centers:
        index = (np.abs(a - center)).argmin()
        if index not in ix:
            ix.append(index)
    return ix


@exporter.export
def nearest_intersection_idx(a, b):
    """Determine the index of the point just before two lines with common x values.

    Parameters
    ----------
    a : array-like
        1-dimensional array of y-values for line 1
    b : array-like
        1-dimensional array of y-values for line 2

    Returns
    -------
        An array of indexes representing the index of the values
        just before the intersection(s) of the two lines.
    """
    # Difference in the two y-value sets
    difference = a - b

    # Determine the point just before the intersection of the lines
    # Will return multiple points for multiple intersections
    sign_change_idx, = np.nonzero(np.diff(np.sign(difference)))

    return sign_change_idx


@exporter.export
def find_intersections(x, a, b, direction='all'):
    """Calculate the best estimate of intersection.

    Calculates the best estimates of the intersection of two y-value
    data sets that share a common x-value set.

    Parameters
    ----------
    x : array-like
        1-dimensional array of numeric x-values
    a : array-like
        1-dimensional array of y-values for line 1
    b : array-like
        1-dimensional array of y-values for line 2
    direction : string
        specifies direction of crossing. 'all', 'increasing' (a becoming greater than b),
        or 'decreasing' (b becoming greater than a).

    Returns
    -------
        A tuple (x, y) of array-like with the x and y coordinates of the
        intersections of the lines.
    """
    # Find the index of the points just before the intersection(s)
    nearest_idx = nearest_intersection_idx(a, b)
    next_idx = nearest_idx + 1

    # Determine the sign of the change
    sign_change = np.sign(a[next_idx] - b[next_idx])

    # x-values around each intersection
    _, x0 = _next_non_masked_element(x, nearest_idx)
    _, x1 = _next_non_masked_element(x, next_idx)

    # y-values around each intersection for the first line
    _, a0 = _next_non_masked_element(a, nearest_idx)
    _, a1 = _next_non_masked_element(a, next_idx)

    # y-values around each intersection for the second line
    _, b0 = _next_non_masked_element(b, nearest_idx)
    _, b1 = _next_non_masked_element(b, next_idx)

    # Calculate the x-intersection. This comes from finding the equations of the two lines,
    # one through (x0, a0) and (x1, a1) and the other through (x0, b0) and (x1, b1),
    # finding their intersection, and reducing with a bunch of algebra.
    delta_y0 = a0 - b0
    delta_y1 = a1 - b1
    intersect_x = (delta_y1 * x0 - delta_y0 * x1) / (delta_y1 - delta_y0)

    # Calculate the y-intersection of the lines. Just plug the x above into the equation
    # for the line through the a points. One could solve for y like x above, but this
    # causes weirder unit behavior and seems a little less good numerically.
    intersect_y = ((intersect_x - x0) / (x1 - x0)) * (a1 - a0) + a0

    # Make a mask based on the direction of sign change desired
    if direction == 'increasing':
        mask = sign_change > 0
    elif direction == 'decreasing':
        mask = sign_change < 0
    elif direction == 'all':
        return intersect_x, intersect_y
    else:
        raise ValueError('Unknown option for direction: {0}'.format(str(direction)))
    return intersect_x[mask], intersect_y[mask]


@exporter.export
def interpolate_nans(x, y, kind='linear'):
    """Interpolate NaN values in y.

    Interpolate NaN values in the y dimension. Works with unsorted x values.

    Parameters
    ----------
    x : array-like
        1-dimensional array of numeric x-values
    y : array-like
        1-dimensional array of numeric y-values
    kind : string
        specifies the kind of interpolation x coordinate - 'linear' or 'log'

    Returns
    -------
        An array of the y coordinate data with NaN values interpolated.
    """
    x_sort_args = np.argsort(x)
    x = x[x_sort_args]
    y = y[x_sort_args]
    nans = np.isnan(y)
    if kind is 'linear':
        y[nans] = np.interp(x[nans], x[~nans], y[~nans])
    elif kind is 'log':
        y[nans] = np.interp(np.log(x[nans]), np.log(x[~nans]), y[~nans])
    else:
        raise ValueError('Unknown option for kind: {0}'.format(str(kind)))
    return y[x_sort_args]


def _next_non_masked_element(a, idx):
    """Return the next non masked element of a masked array.

    If an array is masked, return the next non-masked element (if the given index is masked).
    If no other unmasked points are after the given masked point, returns none.

    Parameters
    ----------
    a : array-like
        1-dimensional array of numeric values
    idx : integer
        index of requested element

    Returns
    -------
        Index of next non-masked element and next non-masked element
    """
    try:
        next_idx = idx + a[idx:].mask.argmin()
        if ma.is_masked(a[next_idx]):
            return None, None
        else:
            return next_idx, a[next_idx]
    except (AttributeError, TypeError, IndexError):
        return idx, a[idx]


@exporter.export
def reduce_point_density(points, radius, priority=None):
    r"""Return a mask to reduce the density of points in irregularly-spaced data.

    This function is used to down-sample a collection of scattered points (e.g. surface
    data), returning a mask that can be used to select the points from one or more arrays
    (e.g. arrays of temperature and dew point). The points selected can be controlled by
    providing an array of ``priority`` values (e.g. rainfall totals to ensure that
    stations with higher precipitation remain in the mask).

    Parameters
    ----------
    points : (N, K) array-like
        N locations of the points in K dimensional space
    radius : float
        minimum radius allowed between points
    priority : (N, K) array-like, optional
        If given, this should have the same shape as ``points``; these values will
        be used to control selection priority for points.

    Returns
    -------
        (N,) array-like of boolean values indicating whether points should be kept. This
        can be used directly to index numpy arrays to return only the desired points.

    Examples
    --------
    >>> metpy.calc.reduce_point_density(np.array([1, 2, 3]), 1.)
    array([ True, False,  True], dtype=bool)
    >>> metpy.calc.reduce_point_density(np.array([1, 2, 3]), 1.,
            priority=np.array([0.1, 0.9, 0.3]))
    array([False,  True, False], dtype=bool)
    """
    # Handle 1D input
    if points.ndim < 2:
        points = points.reshape(-1, 1)

    # Make a kd-tree to speed searching of data.
    tree = cKDTree(points)

    # Need to use sorted indices rather than sorting the position
    # so that the keep mask matches *original* order.
    if priority is not None:
        # Need to sort the locations in decreasing priority.
        sorted_indices = np.argsort(priority)[::-1]
    else:
        # Take advantage of iterator nature of range here to avoid making big lists
        sorted_indices = range(len(points))

    # Keep all points initially
    keep = np.ones(len(points), dtype=np.bool)

    # Loop over all the potential points
    for ind in sorted_indices:
        # Only proceed if we haven't already excluded this point
        if keep[ind]:
            # Find the neighbors and eliminate them
            neighbors = tree.query_ball_point(points[ind], radius)
            keep[neighbors] = False

            # We just removed ourselves, so undo that
            keep[ind] = True

    return keep


1			# Copyright (c) 2008-2017 MetPy Developers.
2			# Distributed under the terms of the BSD 3-Clause License.
3			# SPDX-License-Identifier: BSD-3-Clause
4			"""Contains a collection of generally useful calculation tools."""
5
6			import numpy as np
7			import numpy.ma as ma
8			from scipy.spatial import cKDTree
9
10			from ..package_tools import Exporter
11
12			exporter = Exporter(globals())
13
14
15			@exporter.export
16			def resample_nn_1d(a, centers):
17			"""Return one-dimensional nearest-neighbor indexes based on user-specified centers.
18
19			Parameters
20			----------
21			a : array-like
22			1-dimensional array of numeric values from which to
23			extract indexes of nearest-neighbors
24			centers : array-like
25			1-dimensional array of numeric values representing a subset of values to approximate
26
27			Returns
28			-------
29			An array of indexes representing values closest to given array values
30			"""
31			ix = []
32			for center in centers:
33			index = (np.abs(a - center)).argmin()
34			if index not in ix:
35			ix.append(index)
36			return ix
37
38
39			@exporter.export
40			def nearest_intersection_idx(a, b):
41			"""Determine the index of the point just before two lines with common x values.
42
43			Parameters
44			----------
45			a : array-like
46			1-dimensional array of y-values for line 1
47			b : array-like
48			1-dimensional array of y-values for line 2
49
50			Returns
51			-------
52			An array of indexes representing the index of the values
53			just before the intersection(s) of the two lines.
54			"""
55			# Difference in the two y-value sets
56			difference = a - b
57
58			# Determine the point just before the intersection of the lines
59			# Will return multiple points for multiple intersections
60			sign_change_idx, = np.nonzero(np.diff(np.sign(difference)))
61
62			return sign_change_idx
63
64
65			@exporter.export
66			def find_intersections(x, a, b, direction='all'):
67			"""Calculate the best estimate of intersection.
68
69			Calculates the best estimates of the intersection of two y-value
70			data sets that share a common x-value set.
71
72			Parameters
73			----------
74			x : array-like
75			1-dimensional array of numeric x-values
76			a : array-like
77			1-dimensional array of y-values for line 1
78			b : array-like
79			1-dimensional array of y-values for line 2
80			direction : string
81			specifies direction of crossing. 'all', 'increasing' (a becoming greater than b),
82			or 'decreasing' (b becoming greater than a).
83
84			Returns
85			-------
86			A tuple (x, y) of array-like with the x and y coordinates of the
87			intersections of the lines.
88			"""
89			# Find the index of the points just before the intersection(s)
90			nearest_idx = nearest_intersection_idx(a, b)
91			next_idx = nearest_idx + 1
92
93			# Determine the sign of the change
94			sign_change = np.sign(a[next_idx] - b[next_idx])
95
96			# x-values around each intersection
97			_, x0 = _next_non_masked_element(x, nearest_idx)
98			_, x1 = _next_non_masked_element(x, next_idx)
99
100			# y-values around each intersection for the first line
101			_, a0 = _next_non_masked_element(a, nearest_idx)
102			_, a1 = _next_non_masked_element(a, next_idx)
103
104			# y-values around each intersection for the second line
105			_, b0 = _next_non_masked_element(b, nearest_idx)
106			_, b1 = _next_non_masked_element(b, next_idx)
107
108			# Calculate the x-intersection. This comes from finding the equations of the two lines,
109			# one through (x0, a0) and (x1, a1) and the other through (x0, b0) and (x1, b1),
110			# finding their intersection, and reducing with a bunch of algebra.
111			delta_y0 = a0 - b0
112			delta_y1 = a1 - b1
113			intersect_x = (delta_y1 * x0 - delta_y0 * x1) / (delta_y1 - delta_y0)
114
115			# Calculate the y-intersection of the lines. Just plug the x above into the equation
116			# for the line through the a points. One could solve for y like x above, but this
117			# causes weirder unit behavior and seems a little less good numerically.
118			intersect_y = ((intersect_x - x0) / (x1 - x0)) * (a1 - a0) + a0
119
120			# Make a mask based on the direction of sign change desired
121			if direction == 'increasing':
122			mask = sign_change > 0
123			elif direction == 'decreasing':
124			mask = sign_change < 0
125			elif direction == 'all':
126			return intersect_x, intersect_y
127			else:
128			raise ValueError('Unknown option for direction: {0}'.format(str(direction)))
129			return intersect_x[mask], intersect_y[mask]
130
131
132			@exporter.export
133			def interpolate_nans(x, y, kind='linear'):
134			"""Interpolate NaN values in y.
135
136			Interpolate NaN values in the y dimension. Works with unsorted x values.
137
138			Parameters
139			----------
140			x : array-like
141			1-dimensional array of numeric x-values
142			y : array-like
143			1-dimensional array of numeric y-values
144			kind : string
145			specifies the kind of interpolation x coordinate - 'linear' or 'log'
146
147			Returns
148			-------
149			An array of the y coordinate data with NaN values interpolated.
150			"""
151			x_sort_args = np.argsort(x)
152			x = x[x_sort_args]
153			y = y[x_sort_args]
154			nans = np.isnan(y)
155			if kind is 'linear':
156			y[nans] = np.interp(x[nans], x[~nans], y[~nans])
157			elif kind is 'log':
158			y[nans] = np.interp(np.log(x[nans]), np.log(x[~nans]), y[~nans])
159			else:
160			raise ValueError('Unknown option for kind: {0}'.format(str(kind)))
161			return y[x_sort_args]
162
163
164			def _next_non_masked_element(a, idx):
165			"""Return the next non masked element of a masked array.
166
167			If an array is masked, return the next non-masked element (if the given index is masked).
168			If no other unmasked points are after the given masked point, returns none.
169
170			Parameters
171			----------
172			a : array-like
173			1-dimensional array of numeric values
174			idx : integer
175			index of requested element
176
177			Returns
178			-------
179			Index of next non-masked element and next non-masked element
180			"""
181			try:
182			next_idx = idx + a[idx:].mask.argmin()
183			if ma.is_masked(a[next_idx]):
184			return None, None
185			else:
186			return next_idx, a[next_idx]
187			except (AttributeError, TypeError, IndexError):
188			return idx, a[idx]
189
190
191			@exporter.export
192			def reduce_point_density(points, radius, priority=None):
193			r"""Return a mask to reduce the density of points in irregularly-spaced data.
194
195			This function is used to down-sample a collection of scattered points (e.g. surface
196			data), returning a mask that can be used to select the points from one or more arrays
197			(e.g. arrays of temperature and dew point). The points selected can be controlled by
198			providing an array of ``priority`` values (e.g. rainfall totals to ensure that
199			stations with higher precipitation remain in the mask).
200
201			Parameters
202			----------
203			points : (N, K) array-like
204			N locations of the points in K dimensional space
205			radius : float
206			minimum radius allowed between points
207			priority : (N, K) array-like, optional
208			If given, this should have the same shape as ``points``; these values will
209			be used to control selection priority for points.
210
211			Returns
212			-------
213			(N,) array-like of boolean values indicating whether points should be kept. This
214			can be used directly to index numpy arrays to return only the desired points.
215
216			Examples
217			--------
218			>>> metpy.calc.reduce_point_density(np.array([1, 2, 3]), 1.)
219			array([ True, False, True], dtype=bool)
220			>>> metpy.calc.reduce_point_density(np.array([1, 2, 3]), 1.,
221			priority=np.array([0.1, 0.9, 0.3]))
222			array([False, True, False], dtype=bool)
223			"""
224			# Handle 1D input
225			if points.ndim < 2:
226			points = points.reshape(-1, 1)
227
228			# Make a kd-tree to speed searching of data.
229			tree = cKDTree(points)
230
231			# Need to use sorted indices rather than sorting the position
232			# so that the keep mask matches original order.
233			if priority is not None:
234			# Need to sort the locations in decreasing priority.
235			sorted_indices = np.argsort(priority)[::-1]
236			else:
237			# Take advantage of iterator nature of range here to avoid making big lists
238			sorted_indices = range(len(points))
239
240			# Keep all points initially
241			keep = np.ones(len(points), dtype=np.bool)
242
243			# Loop over all the potential points
244			for ind in sorted_indices:
245			# Only proceed if we haven't already excluded this point
246			if keep[ind]:
247			# Find the neighbors and eliminate them
248			neighbors = tree.query_ball_point(points[ind], radius)
249			keep[neighbors] = False
250
251			# We just removed ourselves, so undo that
252			keep[ind] = True
253
254			return keep
255

Unidata / MetPy

Push — master ( 42fce5...725411 )

reduce_point_density() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like