terminator() - Code Metrics - Inspection of "Merge pull request #79 from andycasey/refactor" - andycasey/AnniesLasso - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( b6035d...10c423 )

by Andy

created 2018-01-28 09:56 UTC

terminator() F

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	1	Features	0

Metric	Value
cc	11
c	1
b	1
f	0
dl	0
loc	44
rs	3.1764

How to fix Complexity

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
A polynomial vectorizer for The Cannon.
"""

from __future__ import (division, print_function, absolute_import,
                        unicode_literals)

__all__ = ["PolynomialVectorizer"]

import numpy as np
from collections import (Counter, OrderedDict)
from itertools import combinations_with_replacement
from six import string_types

from .base import BaseVectorizer


class PolynomialVectorizer(BaseVectorizer):
    """
    A vectorizer that models spectral fluxes as combination of polynomial terms.
    Note that either `label_names` *and* `order` must be provided, or the `terms`
    keyword argument needs to be explicitly specified.


    :param label_names: [optional]
        A list of label names that are terms in the label vector.

    :param order: [optional]
        The maximal order for the vectorizer.

    :param terms: [optional]
        A structured list of tuples that defines the full extent of the label
        vector. Note that `terms` *must* be `None` if `label_names` or `order`
        are provided.
    """

    def __init__(self, label_names=None, order=None, terms=None, **kwargs):
        
        # Check to see if we have a terms/(label_names and order) dichotamy/
        if (terms is None and None in (label_names, order)) \
        or (terms is not None and order is not None):
            raise ValueError("order must be None if terms are provided, "
                "and terms must be None if label_names and order are provided")

        if terms is None:
            # Parse human-readable terms.
            terms = terminator(label_names, order, **kwargs)

        elif label_names is None:
            # Parse label names from the terms.
            label_names = get_label_names(parse_label_vector_description(terms))

        # Convert terms to use indices.
        terms = parse_label_vector_description(terms, label_names=label_names)

        super(PolynomialVectorizer, self).__init__(
            label_names=label_names, terms=terms, **kwargs)
        return None


    def get_label_vector(self, labels):
        """
        Return the values of the label vector, given the scaled labels.

        :param labels:
            The scaled and offset labels to use to calculate the label vector(s). 
            This can be a ond-dimensional vector of `K` labels, or a 
            two-dimensional array of `N` by `K` labels.
        """

        labels = np.atleast_2d(labels)
        if labels.ndim > 2:
            raise ValueError("labels must be a 1-d or 2-d array")

        columns = [np.ones(labels.shape[0], dtype=float)]
        for term in self.terms:
            column = 1. # This works; don't use np.multiply/np.product.
            for index, order in term:
                column *= labels[:, index]**order
            columns.append(column)
        return np.vstack(columns)


    def get_label_vector_derivative(self, labels):
        """
        Return the derivatives of the label vector with respect to fluxes.

        :param labels:
            The scaled labels to calculate the label vector derivatives. This can 
            be a one-dimensional vector of `K` labels (using the same order and
            length provided by self.label_names), or a two-dimensional array of
            `N` by `K` values. The returning array will be of shape `(N, D)`,
            where `D` is the number of terms in the label vector description.
        """

        L, T = (len(labels), len(self.terms))

        slicer = np.arange(L)
        indices_used = np.zeros(L, dtype=bool)

        columns = np.ones((T + 1, L), dtype=float)
        columns[0] = 0.0 # First theta derivative always zero.

        for t, term in enumerate(self.terms, start=1):
                
            indices_used[:] = False
            
            for index, order in term:

                dy = order * (labels[index]**(order - 1))
                y = labels[index]**order

                # If it's the index w.r.t. it, take derivative.
                columns[t, index] *= dy

                # Otherwise, calculate as normal.
                columns[t, slicer != index] *= y
                indices_used[index] = True

            columns[t, ~indices_used] = 0

        return columns


    def get_human_readable_label_vector(self, mul="*", pow="^", bracket=False):
        """
        Return a human-readable form of the label vector.
        
        :param mul: [optional]
            String to use to represent a multiplication operator. For example,
            if giving LaTeX label definitions one may want to use '\cdot' for
            the `mul` term.

        :param pow: [optional]
            String to use to represent a power operator.

        :param bracket: [optional]
            Show brackets around each term.

        :returns:
            A human-readable string representing the label vector.
        """
        return human_readable_label_vector(
            self.terms, self.label_names, mul=mul, pow=pow, bracket=bracket)


    @property
    def human_readable_label_vector(self):
        """
        Return a human-readable form of the label vector.
        """
        return self.get_human_readable_label_vector()


    def get_human_readable_label_term(self, term_index, label_names=None,
        **kwargs):
        """
        Return a human-readable form of a single term in the label vector.

        :param term_index:
            The term in the label vector to return.

        :param label_names: [optional]
            The label names to use. For example, these could be LaTeX 
            representations of the label names.

        :returns:
            A human-readable string representing a single term in the label vector.
        """

        if term_index == 0: return "1"
        else:
            return human_readable_label_term(self.terms[term_index - 1],
                label_names=label_names or self.label_names, **kwargs)


def _is_structured_label_vector(label_vector):
    """
    Return whether the provided label vector is structured as a polynomial
    vector description appropriately or not.

    :param label_vector:
        A structured or unstructured description of a polynomial label vector.
    """
    if not isinstance(label_vector, (list, tuple)):
        return False

    for descriptor in label_vector:
        if not isinstance(descriptor, (list, tuple)):
            return False

        for term in descriptor:
            if not isinstance(term, (list, tuple)) \
            or len(term) != 2 \
            or not isinstance(term[-1], (int, float)):
                return False

    if len(label_vector) == 0 or sum(map(len, label_vector)) == 0:
        return False
    return True


def parse_label_vector_description(description, label_names=None, **kwargs):
    """
    Return a structured form of a label vector from unstructured,
    human-readable input.

    :param description:
        A human-readable or structured form of a label vector.

    :type description:
        str or list

    :param label_names: [optional]
        If `label_names` are provided, instead of label names being provided as 
        the output parameter, the corresponding index location will be given.

    :returns:
        A structured form of the label vector as a multi-level list.


    :Example:

    >>> parse_label_vector("Teff^4 + logg*Teff^3 + feh + feh^0*Teff")
    [
        [
            ("Teff", 4),
        ],
        [
            ("logg", 1),
            ("Teff", 3)
        ],
        [
            ("feh", 1),
        ],
        [
            ("feh", 0),
            ("Teff", 1)
        ]
    ]
    """

    if _is_structured_label_vector(description):
        return description

    # Allow for custom characters, but don't advertise it.
    # (Astronomers have bad enough habits already.)
    kwds = dict(zip(("sep", "mul", "pow"), "+*^"))
    kwds.update(kwargs)
    sep, mul, pow = (kwds[k] for k in ("sep", "mul", "pow"))

    if isinstance(description, string_types):
        description = description.split(sep)
    description = [_.strip() for _ in description]

    # Functions to parse the parameter (or index) and order for each term.
    get_power = lambda t: float(t.split(pow)[1].strip()) if pow in t else 1
    if label_names is None:
        get_label = lambda d: d.split(pow)[0].strip()
    else:
        get_label = lambda d: list(label_names).index(d.split(pow)[0].strip())

    label_vector = []
    for descriptor in (item.split(mul) for item in description):

        labels = map(get_label, descriptor)
        orders = map(get_power, descriptor)

        term = OrderedDict()
        for label, order in zip(labels, orders):
            term[label] = term.get(label, 0) + order # Sum repeat term powers.

        # Prevent uses of x^0 etc clogging up the label vector.
        valid_terms = [(l, o) for l, o in term.items() if o != 0]
        if not np.all(np.isfinite([o for l, o in valid_terms])):
            raise ValueError("non-finite power provided")

        if len(valid_terms) > 0:
            label_vector.append(valid_terms)
    
    if sum(map(len, label_vector)) == 0:
        raise ValueError("no valid terms provided")

    return label_vector


def human_readable_label_term(term, label_names=None, mul="*", pow="^",
    bracket=False):
    """
    Return a human-readable form of a single term in the label vector.

    :param term:
        A structured term.

    :param label_names: [optional]
        The names for each label in the label vector.

    :param mul: [optional]
        String to use to represent a multiplication operator. For example,
        if giving LaTeX label definitions one may want to use '\cdot' for
        the `mul` term.

    :param pow: [optional]
        String to use to represent a power operator.

    :param bracket: [optional]
        Show brackets around each term.

    :returns:
        A human-readable string representing the label vector.
    """
    ct = []
    for i, o in term:
        if isinstance(i, int) and label_names is not None:
            label_name = label_names[i]
        else:
            label_name = i
        if o > 1:
            d = (0, 1)[o - int(o) > 0]
            ct.append("{0}{1}{2:.{3}f}".format(label_name, pow, o, d))
        else:
            ct.append(label_name)

    if bracket and len(ct) > 1:
        return "({})".format(mul.join(ct))
    else:
        return mul.join(ct)


def human_readable_label_vector(terms, label_names=None, mul="*", pow="^",
    bracket=False):
    """
    Return a human-readable form of the label vector.

    :param terms:
        The structured terms of the label vector.

    :param label_names: [optional]
        The names for each label in the label vector.

    :param mul: [optional]
        String to use to represent a multiplication operator. For example,
        if giving LaTeX label definitions one may want to use '\cdot' for
        the `mul` term.

    :param pow: [optional]
        String to use to represent a power operator.

    :param bracket: [optional]
        Show brackets around each term.

    :returns:
        A human-readable string representing the label vector.
    """
    if not isinstance(terms, (list, tuple)):
        raise TypeError("label vector is not a structured set of terms")

    human_terms = ["1"]
    for term in terms:
        human_terms.append(human_readable_label_term(
            term, label_names=label_names, mul=mul, pow=pow))
    return " + ".join(human_terms)


def terminator(label_names, order, cross_term_order=-1, **kwargs):
    """
    Create the terms required for a label vector description based on the label
    names provided and the order given.

    :param label_names:
        The names of the labels to use in describing the label vector.

    :param order:
        The maximum order of the terms (e.g., order 3 implies A^3 is a term).

    :param cross_term_order: [optional]
        The maximum order of the cross-terms (e.g., cross_term_order 2 implies
        A^2*B is a term). If the provided `cross_term_order` value is negative, 
        then `cross_term_order = order - 1` will be assumed.

    :param mul: [optional]
        The operator to use to represent multiplication in the description of 
        the label vector.

    :param pow: [optional]
        The operator to use to represent exponents in the description of the
        label vector.

    :returns:
        A human-readable form of the label vector.
    """
    sep, mul, pow = kwargs.pop(["sep", "mul", "pow"], "+*^")

    #I make no apologies: it's fun to code like this for short complex functions
    items = []
    if 0 > cross_term_order:
        cross_term_order = order - 1

    for o in range(1, 1 + max(order, 1 + cross_term_order)):
        for t in map(Counter, combinations_with_replacement(label_names, o)):
            # Python 2 and 3 behave differently here, so generate an ordered
            # dictionary based on sorting the keys.
            t = OrderedDict([(k, t[k]) for k in sorted(t.keys())])
            if len(t) == 1 and order >= max(t.values()) \
            or len(t) > 1 and cross_term_order >= max(t.values()):
                c = [pow.join([[l], [l, str(p)]][p > 1]) for l, p in t.items()]
                if c: items.append(mul.join(map(str, c)))
    return " {} ".format(sep).join(items)


def get_label_names(label_vector):
    """
    Return the label names that contribute to the structured label vector
    description provided.

    :param label_vector:
        A structured description of the label vector.

    :returns:
        A list of the label names that make up the label vector.
    """
    return list(OrderedDict.fromkeys([label for term in label_vector \
        for label, power in term if power != 0]))


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4			"""
5			A polynomial vectorizer for The Cannon.
6			"""
7
8			from __future__ import (division, print_function, absolute_import,
9			unicode_literals)
10
11			__all__ = ["PolynomialVectorizer"]
12
13			import numpy as np
14			from collections import (Counter, OrderedDict)
15			from itertools import combinations_with_replacement
16			from six import string_types
17
18			from .base import BaseVectorizer
19
20
21			class PolynomialVectorizer(BaseVectorizer):
22			"""
23			A vectorizer that models spectral fluxes as combination of polynomial terms.
24			Note that either `label_names` and `order` must be provided, or the `terms`
25			keyword argument needs to be explicitly specified.
26
27
28			:param label_names: [optional]
29			A list of label names that are terms in the label vector.
30
31			:param order: [optional]
32			The maximal order for the vectorizer.
33
34			:param terms: [optional]
35			A structured list of tuples that defines the full extent of the label
36			vector. Note that `terms` must be `None` if `label_names` or `order`
37			are provided.
38			"""
39
40			def __init__(self, label_names=None, order=None, terms=None, **kwargs):
41
42			# Check to see if we have a terms/(label_names and order) dichotamy/
43			if (terms is None and None in (label_names, order)) \
44			or (terms is not None and order is not None):
45			raise ValueError("order must be None if terms are provided, "
46			"and terms must be None if label_names and order are provided")
47
48			if terms is None:
49			# Parse human-readable terms.
50			terms = terminator(label_names, order, **kwargs)
51
52			elif label_names is None:
53			# Parse label names from the terms.
54			label_names = get_label_names(parse_label_vector_description(terms))
55
56			# Convert terms to use indices.
57			terms = parse_label_vector_description(terms, label_names=label_names)
58
59			super(PolynomialVectorizer, self).__init__(
60			label_names=label_names, terms=terms, **kwargs)
61			return None
62
63
64			def get_label_vector(self, labels):
65			"""
66			Return the values of the label vector, given the scaled labels.
67
68			:param labels:
69			The scaled and offset labels to use to calculate the label vector(s).
70			This can be a ond-dimensional vector of `K` labels, or a
71			two-dimensional array of `N` by `K` labels.
72			"""
73
74			labels = np.atleast_2d(labels)
75			if labels.ndim > 2:
76			raise ValueError("labels must be a 1-d or 2-d array")
77
78			columns = [np.ones(labels.shape[0], dtype=float)]
79			for term in self.terms:
80			column = 1. # This works; don't use np.multiply/np.product.
81			for index, order in term:
82			column = labels[:, index]*order
83			columns.append(column)
84			return np.vstack(columns)
85
86
87			def get_label_vector_derivative(self, labels):
88			"""
89			Return the derivatives of the label vector with respect to fluxes.
90
91			:param labels:
92			The scaled labels to calculate the label vector derivatives. This can
93			be a one-dimensional vector of `K` labels (using the same order and
94			length provided by self.label_names), or a two-dimensional array of
95			`N` by `K` values. The returning array will be of shape `(N, D)`,
96			where `D` is the number of terms in the label vector description.
97			"""
98
99			L, T = (len(labels), len(self.terms))
100
101			slicer = np.arange(L)
102			indices_used = np.zeros(L, dtype=bool)
103
104			columns = np.ones((T + 1, L), dtype=float)
105			columns[0] = 0.0 # First theta derivative always zero.
106
107			for t, term in enumerate(self.terms, start=1):
108
109			indices_used[:] = False
110
111			for index, order in term:
112
113			dy = order * (labels[index]**(order - 1))
114			y = labels[index]**order
115
116			# If it's the index w.r.t. it, take derivative.
117			columns[t, index] *= dy
118
119			# Otherwise, calculate as normal.
120			columns[t, slicer != index] *= y
121			indices_used[index] = True
122
123			columns[t, ~indices_used] = 0
124
125			return columns
126
127
128			def get_human_readable_label_vector(self, mul="*", pow="^", bracket=False):
129			"""
130			Return a human-readable form of the label vector.
131
132			:param mul: [optional]
133			String to use to represent a multiplication operator. For example,
134			if giving LaTeX label definitions one may want to use '\cdot' for
135			the `mul` term.
136
137			:param pow: [optional]
138			String to use to represent a power operator.
139
140			:param bracket: [optional]
141			Show brackets around each term.
142
143			:returns:
144			A human-readable string representing the label vector.
145			"""
146			return human_readable_label_vector(
147			self.terms, self.label_names, mul=mul, pow=pow, bracket=bracket)
148
149
150			@property
151			def human_readable_label_vector(self):
152			"""
153			Return a human-readable form of the label vector.
154			"""
155			return self.get_human_readable_label_vector()
156
157
158			def get_human_readable_label_term(self, term_index, label_names=None,
159			**kwargs):
160			"""
161			Return a human-readable form of a single term in the label vector.
162
163			:param term_index:
164			The term in the label vector to return.
165
166			:param label_names: [optional]
167			The label names to use. For example, these could be LaTeX
168			representations of the label names.
169
170			:returns:
171			A human-readable string representing a single term in the label vector.
172			"""
173
174			if term_index == 0: return "1"
175			else:
176			return human_readable_label_term(self.terms[term_index - 1],
177			label_names=label_names or self.label_names, **kwargs)
178
179
180			def _is_structured_label_vector(label_vector):
181			"""
182			Return whether the provided label vector is structured as a polynomial
183			vector description appropriately or not.
184
185			:param label_vector:
186			A structured or unstructured description of a polynomial label vector.
187			"""
188			if not isinstance(label_vector, (list, tuple)):
189			return False
190
191			for descriptor in label_vector:
192			if not isinstance(descriptor, (list, tuple)):
193			return False
194
195			for term in descriptor:
196			if not isinstance(term, (list, tuple)) \
197			or len(term) != 2 \
198			or not isinstance(term[-1], (int, float)):
199			return False
200
201			if len(label_vector) == 0 or sum(map(len, label_vector)) == 0:
202			return False
203			return True
204
205
206			def parse_label_vector_description(description, label_names=None, **kwargs):
207			"""
208			Return a structured form of a label vector from unstructured,
209			human-readable input.
210
211			:param description:
212			A human-readable or structured form of a label vector.
213
214			:type description:
215			str or list
216
217			:param label_names: [optional]
218			If `label_names` are provided, instead of label names being provided as
219			the output parameter, the corresponding index location will be given.
220
221			:returns:
222			A structured form of the label vector as a multi-level list.
223
224
225			:Example:
226
227			>>> parse_label_vector("Teff^4 + loggTeff^3 + feh + feh^0Teff")
228			[
229			[
230			("Teff", 4),
231			],
232			[
233			("logg", 1),
234			("Teff", 3)
235			],
236			[
237			("feh", 1),
238			],
239			[
240			("feh", 0),
241			("Teff", 1)
242			]
243			]
244			"""
245
246			if _is_structured_label_vector(description):
247			return description
248
249			# Allow for custom characters, but don't advertise it.
250			# (Astronomers have bad enough habits already.)
251			kwds = dict(zip(("sep", "mul", "pow"), "+*^"))
252			kwds.update(kwargs)
253			sep, mul, pow = (kwds[k] for k in ("sep", "mul", "pow"))
254
255			if isinstance(description, string_types):
256			description = description.split(sep)
257			description = [_.strip() for _ in description]
258
259			# Functions to parse the parameter (or index) and order for each term.
260			get_power = lambda t: float(t.split(pow)[1].strip()) if pow in t else 1
261			if label_names is None:
262			get_label = lambda d: d.split(pow)[0].strip()
263			else:
264			get_label = lambda d: list(label_names).index(d.split(pow)[0].strip())
265
266			label_vector = []
267			for descriptor in (item.split(mul) for item in description):
268
269			labels = map(get_label, descriptor)
270			orders = map(get_power, descriptor)
271
272			term = OrderedDict()
273			for label, order in zip(labels, orders):
274			term[label] = term.get(label, 0) + order # Sum repeat term powers.
275
276			# Prevent uses of x^0 etc clogging up the label vector.
277			valid_terms = [(l, o) for l, o in term.items() if o != 0]
278			if not np.all(np.isfinite([o for l, o in valid_terms])):
279			raise ValueError("non-finite power provided")
280
281			if len(valid_terms) > 0:
282			label_vector.append(valid_terms)
283
284			if sum(map(len, label_vector)) == 0:
285			raise ValueError("no valid terms provided")
286
287			return label_vector
288
289
290			def human_readable_label_term(term, label_names=None, mul="*", pow="^",
291			bracket=False):
292			"""
293			Return a human-readable form of a single term in the label vector.
294
295			:param term:
296			A structured term.
297
298			:param label_names: [optional]
299			The names for each label in the label vector.
300
301			:param mul: [optional]
302			String to use to represent a multiplication operator. For example,
303			if giving LaTeX label definitions one may want to use '\cdot' for
304			the `mul` term.
305
306			:param pow: [optional]
307			String to use to represent a power operator.
308
309			:param bracket: [optional]
310			Show brackets around each term.
311
312			:returns:
313			A human-readable string representing the label vector.
314			"""
315			ct = []
316			for i, o in term:
317			if isinstance(i, int) and label_names is not None:
318			label_name = label_names[i]
319			else:
320			label_name = i
321			if o > 1:
322			d = (0, 1)[o - int(o) > 0]
323			ct.append("{0}{1}{2:.{3}f}".format(label_name, pow, o, d))
324			else:
325			ct.append(label_name)
326
327			if bracket and len(ct) > 1:
328			return "({})".format(mul.join(ct))
329			else:
330			return mul.join(ct)
331
332
333			def human_readable_label_vector(terms, label_names=None, mul="*", pow="^",
334			bracket=False):
335			"""
336			Return a human-readable form of the label vector.
337
338			:param terms:
339			The structured terms of the label vector.
340
341			:param label_names: [optional]
342			The names for each label in the label vector.
343
344			:param mul: [optional]
345			String to use to represent a multiplication operator. For example,
346			if giving LaTeX label definitions one may want to use '\cdot' for
347			the `mul` term.
348
349			:param pow: [optional]
350			String to use to represent a power operator.
351
352			:param bracket: [optional]
353			Show brackets around each term.
354
355			:returns:
356			A human-readable string representing the label vector.
357			"""
358			if not isinstance(terms, (list, tuple)):
359			raise TypeError("label vector is not a structured set of terms")
360
361			human_terms = ["1"]
362			for term in terms:
363			human_terms.append(human_readable_label_term(
364			term, label_names=label_names, mul=mul, pow=pow))
365			return " + ".join(human_terms)
366
367
368			def terminator(label_names, order, cross_term_order=-1, **kwargs):
369			"""
370			Create the terms required for a label vector description based on the label
371			names provided and the order given.
372
373			:param label_names:
374			The names of the labels to use in describing the label vector.
375
376			:param order:
377			The maximum order of the terms (e.g., order 3 implies A^3 is a term).
378
379			:param cross_term_order: [optional]
380			The maximum order of the cross-terms (e.g., cross_term_order 2 implies
381			A^2*B is a term). If the provided `cross_term_order` value is negative,
382			then `cross_term_order = order - 1` will be assumed.
383
384			:param mul: [optional]
385			The operator to use to represent multiplication in the description of
386			the label vector.
387
388			:param pow: [optional]
389			The operator to use to represent exponents in the description of the
390			label vector.
391
392			:returns:
393			A human-readable form of the label vector.
394			"""
395			sep, mul, pow = kwargs.pop(["sep", "mul", "pow"], "+*^")
396
397			#I make no apologies: it's fun to code like this for short complex functions
398			items = []
399			if 0 > cross_term_order:
400			cross_term_order = order - 1
401
402			for o in range(1, 1 + max(order, 1 + cross_term_order)):
403			for t in map(Counter, combinations_with_replacement(label_names, o)):
404			# Python 2 and 3 behave differently here, so generate an ordered
405			# dictionary based on sorting the keys.
406			t = OrderedDict([(k, t[k]) for k in sorted(t.keys())])
407			if len(t) == 1 and order >= max(t.values()) \
408			or len(t) > 1 and cross_term_order >= max(t.values()):
409			c = [pow.join([[l], [l, str(p)]][p > 1]) for l, p in t.items()]
410			if c: items.append(mul.join(map(str, c)))
411			return " {} ".format(sep).join(items)
412
413
414			def get_label_names(label_vector):
415			"""
416			Return the label names that contribute to the structured label vector
417			description provided.
418
419			:param label_vector:
420			A structured description of the label vector.
421
422			:returns:
423			A list of the label names that make up the label vector.
424			"""
425			return list(OrderedDict.fromkeys([label for term in label_vector \
426			for label, power in term if power != 0]))
427

andycasey / AnniesLasso

Push — master ( b6035d...10c423 )

terminator() F

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like