Issues in aggregation.py (master) - Issues in master - mila-udem/blocks - Measure and Improve Code Quality continuously with Scrutinizer

Issues (119)

blocks/monitoring/aggregation.py (6 issues)

Labels

Severity

"""Evaluate Theano variables on auxiliary data and during training."""
from functools import partial
import logging
from abc import ABCMeta, abstractmethod

from six import add_metaclass
from theano import tensor
from theano.ifelse import ifelse

from blocks.utils import shared_like

logger = logging.getLogger(__name__)


@add_metaclass(ABCMeta)
class AggregationScheme(object):
    """How to incrementally evaluate a Theano variable over minibatches.

    An AggregationScheme allocates :class:`Aggregator` that can
    incrementally compute the value of a Theano variable on a full dataset
    by aggregating partial results computed on multiple batches.

    The AggregationScheme should be attached via the tag
    ``aggregation_scheme`` to a Theano variable which computes the desired
    value on a single batch.

    Parameters
    ----------
    variable: :class:`~tensor.TensorVariable`
        The variable that holds the desired value on a single batch.

    """
    def __init__(self, variable):
        self.variable = variable

    @abstractmethod
    def get_aggregator(self):
        """Return a new Aggregator for this variable."""
        pass


class Aggregator(object):
    """An Aggregator incrementally evaluates a Theano variable on a dataset.

    .. warning::
        The Aggregators should never be created directly. Instead use the
        :meth:`AggregationScheme.get_aggregator` method.

    Example usages are:

    * compute the mean of some value over examples, sequence lengths etc.
    * track a parameter of a model
    * monitor a penalty

    The Aggregator maintains a set of Theano sharer values called
    accumulators and specifies how they should be initialized, and
    updated with incremental calculations. Finally, it
    provides a Theano variable that reads the accumulators
    and computes the final value.

    Parameters
    ----------
    aggregation_scheme : :class:`AggregationScheme`
        The aggregation scheme that constructed this Aggregator
    initialization_updates : list of Theano updates
        Updates that specify how to initialize shared variables of
        this Aggregator. *Can only refer to shared variables and
        constants.*
    accumulation_updates : list of Theano updates
        Updates that specify how a new batch of data gets processed
        by this Aggregator. *Can refer to model inputs.*
    readout_variable : :class:`~tensor.TensorVariable`
        Theano variable that holds the final value based on aggregated
        partial results. *readout_variable must only consist of shared
        variables and constants.*

    Attributes
    ----------
    All constructor parameters are accessible as attributes.

    """
    def __init__(self, aggregation_scheme, initialization_updates=None,
                 accumulation_updates=None, readout_variable=None):
        self.aggregation_scheme = aggregation_scheme
        self.readout_variable = readout_variable

        if initialization_updates is None:
            initialization_updates = []
        if accumulation_updates is None:
            accumulation_updates = []
        self.initialization_updates = initialization_updates
        self.accumulation_updates = accumulation_updates


class Mean(AggregationScheme):
    """Aggregation scheme which computes the mean.

    Parameters
    ----------
    numerator : :class:`~tensor.TensorVariable`
        Theano variable for the numerator e.g. the likelihood
    denominator : :class:`~tensor.TensorVariable`
        Theano variable for the denominator e.g. the batch size

    """
    def __init__(self, numerator, denominator):
class SomeParent:
    def __init__(self):
        self.x = 1

class SomeChild(SomeParent):
    def __init__(self):
        # Initialize the super class
        SomeParent.__init__(self)
        self.numerator = numerator
        self.denominator = denominator

    def get_aggregator(self):
        initialized = shared_like(0.)
        numerator_acc = shared_like(self.numerator)
        denominator_acc = shared_like(self.denominator)

        # Dummy default expression to use as the previously-aggregated
        # value, that has the same shape as the new result
        numerator_zeros = tensor.as_tensor(self.numerator).zeros_like()
        denominator_zeros = tensor.as_tensor(self.denominator).zeros_like()

        conditional_update_num = self.numerator + ifelse(initialized,
                                                         numerator_acc,
                                                         numerator_zeros)
        conditional_update_den = self.denominator + ifelse(initialized,
                                                           denominator_acc,
                                                           denominator_zeros)

        initialization_updates = [(numerator_acc,
                                   tensor.zeros_like(numerator_acc)),
                                  (denominator_acc,
                                   tensor.zeros_like(denominator_acc)),
                                  (initialized,
                                   tensor.zeros_like(initialized))]
        accumulation_updates = [(numerator_acc,
                                 conditional_update_num),
                                (denominator_acc,
                                 conditional_update_den),
                                (initialized, tensor.ones_like(initialized))]
        aggregator = Aggregator(aggregation_scheme=self,
                                initialization_updates=initialization_updates,
                                accumulation_updates=accumulation_updates,
                                readout_variable=(numerator_acc /
                                                  denominator_acc))
        return aggregator


class Perplexity(Mean):

    def get_aggregator(self):
        aggregator = super(Perplexity, self).get_aggregator()
        aggregator.readout_variable = tensor.exp(-aggregator.readout_variable)
        return aggregator


def mean(numerator, denominator=1.):
    """Mean of quantity (numerator) over a number (denominator) values."""
    variable = numerator / denominator
    variable.tag.aggregation_scheme = Mean(numerator, denominator)
    variable.name = numerator.name
    return variable


def perplexity(log_likelihood, n_examples):
    """Perplexity for total log_likelihood of n_examples."""
    variable = tensor.exp(-log_likelihood / n_examples)
    variable.tag.aggregation_scheme = Perplexity(log_likelihood, n_examples)
    variable.name = 'perplexity'
    return variable


class _DataIndependent(AggregationScheme):
    """Dummy aggregation scheme for values that don't depend on data."""
    def get_aggregator(self):
        return Aggregator(aggregation_scheme=self,
                          initialization_updates=[],
                          accumulation_updates=[],
                          readout_variable=self.variable)


class TakeLast(AggregationScheme):
    """Aggregation scheme which remembers only the last value."""
    def get_aggregator(self):
        self.storage = shared_like(self.variable)
        return Aggregator(aggregation_scheme=self,
                          initialization_updates=[
                              (self.storage, tensor.zeros_like(self.storage))],
                          accumulation_updates=[(self.storage, self.variable)],
                          readout_variable=self.storage)


def _simple_aggregation(scheme, variable):
    variable = variable.copy(variable.name)
    variable.tag.aggregation_scheme = scheme(variable)
    return variable


take_last = partial(_simple_aggregation, TakeLast)



class Minimum(AggregationScheme):
    """Aggregation scheme which remembers only the minimum value."""
    def _build_aggregator(self, accumulate_update):
        initialized = shared_like(0.)
        accumulate = ifelse(initialized, accumulate_update, self.variable)
        return Aggregator(aggregation_scheme=self,
                          initialization_updates=[
                              (self.storage, tensor.zeros_like(self.storage)),
                              (initialized, tensor.zeros_like(initialized))
                          ],
                          accumulation_updates=[
                              (self.storage, accumulate),
                              (initialized, tensor.ones_like(initialized))
                          ],
                          readout_variable=self.storage)

    def get_aggregator(self):
        self.storage = shared_like(self.variable)
        return self._build_aggregator(tensor.minimum(self.storage,
                                                     self.variable))

minimum = partial(_simple_aggregation, Minimum)



class Maximum(Minimum):
    """Aggregation scheme which remembers only the maximum value."""
    def get_aggregator(self):
        self.storage = shared_like(self.variable)
        return self._build_aggregator(tensor.maximum(self.storage,
                                                     self.variable))

maximum = partial(_simple_aggregation, Maximum)



class Concatenate(Minimum):
    """Aggregation scheme which remembers values from all batches.

    Parameters
    ----------
    variable: :class:`~tensor.TensorVariable`
        The variable that holds the desired value on a single batch.

    """
    def __init__(self, variable):
        # Add an extra axis to concatenate along. Must be non-broadcastable
        # for concatenate to always work.
        variable = (tensor.unbroadcast(tensor.shape_padleft(variable, 1), 0)
                    .copy(variable.name))
        super(Concatenate, self).__init__(variable)

    def get_aggregator(self):
        self.storage = shared_like(self.variable)
        return self._build_aggregator(tensor.concatenate([self.storage,
                                                          self.variable]))

concatenate = partial(_simple_aggregation, Concatenate)



@add_metaclass(ABCMeta)

class MonitoredQuantity(object):
    """The base class for monitored-quantities.

    To monitor a non-Theano quantity in Blocks you have to implement this
    interface for it. The initialize method initializes accumulators and
    the parameters needed to compute this quantity, aggregate method
    aggregates results for every batch, and finally readout is called
    to get the aggregated results.

    Attributes
    ----------
    requires : list
        List of Theano variables needed to calculate this quantity.
    name : str
        The name of monitored quantity which appears in the log.

    See Also
    --------
    :class:`~blocks.monitoring.evaluators.DatasetEvaluator`
    :class:`~blocks.extensions.DataStreamMonitoring`

    """
    def __init__(self, requires=None, name=None):
        if requires is None:
            requires = []
        self.requires = requires
        self.name = name

    @abstractmethod
    def initialize(self):
        """Initialize accumulators for this monitored quantity."""
        pass

    @abstractmethod
    def aggregate(self, *args):
        r"""Aggregate results for every batch.

        \*args : list of :class:`~numpy.ndarray`
            The values of the variables required to aggregate the
            value of the quantity.

        """
        pass

    @abstractmethod
    def get_aggregated_value(self):
        """Obtain the result of aggregation."""
        pass


1			"""Evaluate Theano variables on auxiliary data and during training."""
2			from functools import partial
3			import logging
4			from abc import ABCMeta, abstractmethod
5
6			from six import add_metaclass
7			from theano import tensor
8			from theano.ifelse import ifelse
9
10			from blocks.utils import shared_like
11
12			logger = logging.getLogger(__name__)
13
14
15			@add_metaclass(ABCMeta)
16			class AggregationScheme(object):
17			"""How to incrementally evaluate a Theano variable over minibatches.
18
19			An AggregationScheme allocates :class:`Aggregator` that can
20			incrementally compute the value of a Theano variable on a full dataset
21			by aggregating partial results computed on multiple batches.
22
23			The AggregationScheme should be attached via the tag
24			``aggregation_scheme`` to a Theano variable which computes the desired
25			value on a single batch.
26
27			Parameters
28			----------
29			variable: :class:`~tensor.TensorVariable`
30			The variable that holds the desired value on a single batch.
31
32			"""
33			def __init__(self, variable):
34			self.variable = variable
35
36			@abstractmethod
37			def get_aggregator(self):
38			"""Return a new Aggregator for this variable."""
39			pass
40
41
42			class Aggregator(object):
43			"""An Aggregator incrementally evaluates a Theano variable on a dataset.
44
45			.. warning::
46			The Aggregators should never be created directly. Instead use the
47			:meth:`AggregationScheme.get_aggregator` method.
48
49			Example usages are:
50
51			* compute the mean of some value over examples, sequence lengths etc.
52			* track a parameter of a model
53			* monitor a penalty
54
55			The Aggregator maintains a set of Theano sharer values called
56			accumulators and specifies how they should be initialized, and
57			updated with incremental calculations. Finally, it
58			provides a Theano variable that reads the accumulators
59			and computes the final value.
60
61			Parameters
62			----------
63			aggregation_scheme : :class:`AggregationScheme`
64			The aggregation scheme that constructed this Aggregator
65			initialization_updates : list of Theano updates
66			Updates that specify how to initialize shared variables of
67			this Aggregator. *Can only refer to shared variables and
68			constants.*
69			accumulation_updates : list of Theano updates
70			Updates that specify how a new batch of data gets processed
71			by this Aggregator. Can refer to model inputs.
72			readout_variable : :class:`~tensor.TensorVariable`
73			Theano variable that holds the final value based on aggregated
74			partial results. *readout_variable must only consist of shared
75			variables and constants.*
76
77			Attributes
78			----------
79			All constructor parameters are accessible as attributes.
80
81			"""
82			def __init__(self, aggregation_scheme, initialization_updates=None,
83			accumulation_updates=None, readout_variable=None):
84			self.aggregation_scheme = aggregation_scheme
85			self.readout_variable = readout_variable
86
87			if initialization_updates is None:
88			initialization_updates = []
89			if accumulation_updates is None:
90			accumulation_updates = []
91			self.initialization_updates = initialization_updates
92			self.accumulation_updates = accumulation_updates
93
94
95			class Mean(AggregationScheme):
96			"""Aggregation scheme which computes the mean.
97
98			Parameters
99			----------
100			numerator : :class:`~tensor.TensorVariable`
101			Theano variable for the numerator e.g. the likelihood
102			denominator : :class:`~tensor.TensorVariable`
103			Theano variable for the denominator e.g. the batch size
104
105			"""
106			def __init__(self, numerator, denominator):
			0 ignored issues – show Bug introduced 2016-10-29 17:34 UTC by Report Bug Copy Issue Report Show Similar Issues like this The `__init__` method of the super-class `AggregationScheme` is not called. It is generally advisable to initialize the super-class by calling its `__init__` method: class SomeParent: def __init__(self): self.x = 1 class SomeChild(SomeParent): def __init__(self): # Initialize the super class SomeParent.__init__(self) Loading history...
107			self.numerator = numerator
108			self.denominator = denominator
109
110			def get_aggregator(self):
111			initialized = shared_like(0.)
112			numerator_acc = shared_like(self.numerator)
113			denominator_acc = shared_like(self.denominator)
114
115			# Dummy default expression to use as the previously-aggregated
116			# value, that has the same shape as the new result
117			numerator_zeros = tensor.as_tensor(self.numerator).zeros_like()
118			denominator_zeros = tensor.as_tensor(self.denominator).zeros_like()
119
120			conditional_update_num = self.numerator + ifelse(initialized,
121			numerator_acc,
122			numerator_zeros)
123			conditional_update_den = self.denominator + ifelse(initialized,
124			denominator_acc,
125			denominator_zeros)
126
127			initialization_updates = [(numerator_acc,
128			tensor.zeros_like(numerator_acc)),
129			(denominator_acc,
130			tensor.zeros_like(denominator_acc)),
131			(initialized,
132			tensor.zeros_like(initialized))]
133			accumulation_updates = [(numerator_acc,
134			conditional_update_num),
135			(denominator_acc,
136			conditional_update_den),
137			(initialized, tensor.ones_like(initialized))]
138			aggregator = Aggregator(aggregation_scheme=self,
139			initialization_updates=initialization_updates,
140			accumulation_updates=accumulation_updates,
141			readout_variable=(numerator_acc /
142			denominator_acc))
143			return aggregator
144
145
146			class Perplexity(Mean):
147
148			def get_aggregator(self):
149			aggregator = super(Perplexity, self).get_aggregator()
150			aggregator.readout_variable = tensor.exp(-aggregator.readout_variable)
151			return aggregator
152
153
154			def mean(numerator, denominator=1.):
155			"""Mean of quantity (numerator) over a number (denominator) values."""
156			variable = numerator / denominator
157			variable.tag.aggregation_scheme = Mean(numerator, denominator)
158			variable.name = numerator.name
159			return variable
160
161
162			def perplexity(log_likelihood, n_examples):
163			"""Perplexity for total log_likelihood of n_examples."""
164			variable = tensor.exp(-log_likelihood / n_examples)
165			variable.tag.aggregation_scheme = Perplexity(log_likelihood, n_examples)
166			variable.name = 'perplexity'
167			return variable
168
169
170			class _DataIndependent(AggregationScheme):
171			"""Dummy aggregation scheme for values that don't depend on data."""
172			def get_aggregator(self):
173			return Aggregator(aggregation_scheme=self,
174			initialization_updates=[],
175			accumulation_updates=[],
176			readout_variable=self.variable)
177
178
179			class TakeLast(AggregationScheme):
180			"""Aggregation scheme which remembers only the last value."""
181			def get_aggregator(self):
182			self.storage = shared_like(self.variable)
183			return Aggregator(aggregation_scheme=self,
184			initialization_updates=[
185			(self.storage, tensor.zeros_like(self.storage))],
186			accumulation_updates=[(self.storage, self.variable)],
187			readout_variable=self.storage)
188
189
190			def _simple_aggregation(scheme, variable):
191			variable = variable.copy(variable.name)
192			variable.tag.aggregation_scheme = scheme(variable)
193			return variable
194
195
196			take_last = partial(_simple_aggregation, TakeLast)
			0 ignored issues – show Coding Style Naming introduced 2016-10-29 17:34 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `take_last` does not conform to the constant naming conventions (`(([A-Z_][A-Z0-9_])\|(__.__))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
197
198
199			class Minimum(AggregationScheme):
200			"""Aggregation scheme which remembers only the minimum value."""
201			def _build_aggregator(self, accumulate_update):
202			initialized = shared_like(0.)
203			accumulate = ifelse(initialized, accumulate_update, self.variable)
204			return Aggregator(aggregation_scheme=self,
205			initialization_updates=[
206			(self.storage, tensor.zeros_like(self.storage)),
207			(initialized, tensor.zeros_like(initialized))
208			],
209			accumulation_updates=[
210			(self.storage, accumulate),
211			(initialized, tensor.ones_like(initialized))
212			],
213			readout_variable=self.storage)
214
215			def get_aggregator(self):
216			self.storage = shared_like(self.variable)
217			return self._build_aggregator(tensor.minimum(self.storage,
218			self.variable))
219
220			minimum = partial(_simple_aggregation, Minimum)
			0 ignored issues – show Coding Style Naming introduced 2016-10-29 17:34 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `minimum` does not conform to the constant naming conventions (`(([A-Z_][A-Z0-9_])\|(__.__))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
221
222
223			class Maximum(Minimum):
224			"""Aggregation scheme which remembers only the maximum value."""
225			def get_aggregator(self):
226			self.storage = shared_like(self.variable)
227			return self._build_aggregator(tensor.maximum(self.storage,
228			self.variable))
229
230			maximum = partial(_simple_aggregation, Maximum)
			0 ignored issues – show Coding Style Naming introduced 2016-10-29 17:34 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `maximum` does not conform to the constant naming conventions (`(([A-Z_][A-Z0-9_])\|(__.__))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
231
232
233			class Concatenate(Minimum):
234			"""Aggregation scheme which remembers values from all batches.
235
236			Parameters
237			----------
238			variable: :class:`~tensor.TensorVariable`
239			The variable that holds the desired value on a single batch.
240
241			"""
242			def __init__(self, variable):
243			# Add an extra axis to concatenate along. Must be non-broadcastable
244			# for concatenate to always work.
245			variable = (tensor.unbroadcast(tensor.shape_padleft(variable, 1), 0)
246			.copy(variable.name))
247			super(Concatenate, self).__init__(variable)
248
249			def get_aggregator(self):
250			self.storage = shared_like(self.variable)
251			return self._build_aggregator(tensor.concatenate([self.storage,
252			self.variable]))
253
254			concatenate = partial(_simple_aggregation, Concatenate)
			0 ignored issues – show Coding Style Naming introduced 2016-10-29 21:18 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `concatenate` does not conform to the constant naming conventions (`(([A-Z_][A-Z0-9_])\|(__.__))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
255
256
257			@add_metaclass(ABCMeta)
			0 ignored issues – show Unused Code introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this This abstract class does not seem to be used anywhere. Loading history...
258			class MonitoredQuantity(object):
259			"""The base class for monitored-quantities.
260
261			To monitor a non-Theano quantity in Blocks you have to implement this
262			interface for it. The initialize method initializes accumulators and
263			the parameters needed to compute this quantity, aggregate method
264			aggregates results for every batch, and finally readout is called
265			to get the aggregated results.
266
267			Attributes
268			----------
269			requires : list
270			List of Theano variables needed to calculate this quantity.
271			name : str
272			The name of monitored quantity which appears in the log.
273
274			See Also
275			--------
276			:class:`~blocks.monitoring.evaluators.DatasetEvaluator`
277			:class:`~blocks.extensions.DataStreamMonitoring`
278
279			"""
280			def __init__(self, requires=None, name=None):
281			if requires is None:
282			requires = []
283			self.requires = requires
284			self.name = name
285
286			@abstractmethod
287			def initialize(self):
288			"""Initialize accumulators for this monitored quantity."""
289			pass
290
291			@abstractmethod
292			def aggregate(self, *args):
293			r"""Aggregate results for every batch.
294
295			\*args : list of :class:`~numpy.ndarray`
296			The values of the variables required to aggregate the
297			value of the quantity.
298
299			"""
300			pass
301
302			@abstractmethod
303			def get_aggregated_value(self):
304			"""Obtain the result of aggregation."""
305			pass
306

mila-udem / blocks

Issues (119)

blocks/monitoring/aggregation.py (6 issues)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like