Code Duplication - mila-udem/blocks - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 41-46 lines in 2 locations

blocks/algorithms/init.py 2 locations


    """Accumulates step with exponential discount.

    Combines :class:`BasicMomentum` and :class:`Scale` to form the
    usual momentum step rule.

    Parameters
    ----------
    learning_rate : float, optional
        The learning rate by which the previous step scaled. Defaults to 1.
    momentum : float, optional
        The momentum coefficient. Defaults to 0.

    Attributes
    ----------
    learning_rate : :class:`~tensor.SharedVariable`
        A variable for learning rate.
    momentum : :class:`~tensor.SharedVariable`
        A variable for momentum.

    See Also
    --------
    :class:`SharedVariableModifier`

    """
    def __init__(self, learning_rate=1.0, momentum=0.):
        scale = Scale(learning_rate=learning_rate)
        basic_momentum = BasicMomentum(momentum=momentum)
        self.learning_rate = scale.learning_rate
        self.momentum = basic_momentum.momentum
        self.components = [scale, basic_momentum]


class AdaDelta(StepRule):
    """Adapts the step size over time using only first order information.

    Parameters
    ----------
    decay_rate : float, optional
        Decay rate in [0, 1]. Defaults to 0.95.
    epsilon : float, optional
        Stabilizing constant for RMS. Defaults to 1e-6.

    Notes
    -----
    For more information, see [ADADELTA]_.

    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
       Rate Method*, arXiv:1212.5701.

    """
    def __init__(self, decay_rate=0.95, epsilon=1e-6):
        if not 0.0 <= decay_rate <= 1.0:

            norm = l2_norm(previous_steps.values())
            multiplier = tensor.switch(norm < self.threshold,
                                       1, self.threshold / norm)
            steps = OrderedDict(
                (parameter, step * multiplier)
                for parameter, step in previous_steps.items())
        return steps, []


class VariableClipping(StepRule):
    """Clip the maximum norm of individual variables along certain axes.

    This :class:`StepRule` can be used to implement L2 norm constraints on
    e.g. the weight vectors of individual hidden units, convolutional
    filters or entire weight tensors. Combine with :class:`Restrict`
    (and possibly :class:`CompositeRule`), to apply such constraints only
    to certain variables and/or apply different norm constraints to
    different variables.

    Parameters
    ----------
    threshold : float
        Maximum norm for a given (portion of a) tensor.
    axis : int or iterable, optional
        An integer single axis, or an iterable collection of integer
        axes over which to sum in order to calculate the L2 norm. If
        `None` (the default), the norm is computed over all elements
        of the tensor.

    Notes
    -----
    Because of the way the :class:`StepRule` API works, this particular
    rule implements norm clipping of the value *after* update in the
    following way: it computes ``parameter - previous_step``, scales it
    to have (possibly axes-wise) norm(s) of at most `threshold`,
    then subtracts *that* value from `parameter` to yield an 'equivalent
    step' that respects the desired norm constraints. This procedure
    implicitly assumes one is doing simple (stochastic) gradient descent,
    and so steps computed by this step rule may not make sense for use
    in other contexts.

    Investigations into max-norm regularization date from [Srebro2005]_.
    The first appearance of this technique as a regularization method
    for the weight vectors of individual hidden units in feed-forward
    neural networks may be [Hinton2012]_.

    .. [Srebro2005] Nathan Srebro and Adi Shraibman.

		@@ 522-567 (lines=46) @@
519		"""Accumulates step with exponential discount.
520
521		Combines :class:`BasicMomentum` and :class:`Scale` to form the
522		usual momentum step rule.
523
524		Parameters
525		----------
526		learning_rate : float, optional
527		The learning rate by which the previous step scaled. Defaults to 1.
528		momentum : float, optional
529		The momentum coefficient. Defaults to 0.
530
531		Attributes
532		----------
533		learning_rate : :class:`~tensor.SharedVariable`
534		A variable for learning rate.
535		momentum : :class:`~tensor.SharedVariable`
536		A variable for momentum.
537
538		See Also
539		--------
540		:class:`SharedVariableModifier`
541
542		"""
543		def __init__(self, learning_rate=1.0, momentum=0.):
544		scale = Scale(learning_rate=learning_rate)
545		basic_momentum = BasicMomentum(momentum=momentum)
546		self.learning_rate = scale.learning_rate
547		self.momentum = basic_momentum.momentum
548		self.components = [scale, basic_momentum]
549
550
551		class AdaDelta(StepRule):
552		"""Adapts the step size over time using only first order information.
553
554		Parameters
555		----------
556		decay_rate : float, optional
557		Decay rate in [0, 1]. Defaults to 0.95.
558		epsilon : float, optional
559		Stabilizing constant for RMS. Defaults to 1e-6.
560
561		Notes
562		-----
563		For more information, see [ADADELTA]_.
564
565		.. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
566		Rate Method*, arXiv:1212.5701.
567
568		"""
569		def __init__(self, decay_rate=0.95, epsilon=1e-6):
570		if not 0.0 <= decay_rate <= 1.0:
		@@ 725-765 (lines=41) @@
722		norm = l2_norm(previous_steps.values())
723		multiplier = tensor.switch(norm < self.threshold,
724		1, self.threshold / norm)
725		steps = OrderedDict(
726		(parameter, step * multiplier)
727		for parameter, step in previous_steps.items())
728		return steps, []
729
730
731		class VariableClipping(StepRule):
732		"""Clip the maximum norm of individual variables along certain axes.
733
734		This :class:`StepRule` can be used to implement L2 norm constraints on
735		e.g. the weight vectors of individual hidden units, convolutional
736		filters or entire weight tensors. Combine with :class:`Restrict`
737		(and possibly :class:`CompositeRule`), to apply such constraints only
738		to certain variables and/or apply different norm constraints to
739		different variables.
740
741		Parameters
742		----------
743		threshold : float
744		Maximum norm for a given (portion of a) tensor.
745		axis : int or iterable, optional
746		An integer single axis, or an iterable collection of integer
747		axes over which to sum in order to calculate the L2 norm. If
748		`None` (the default), the norm is computed over all elements
749		of the tensor.
750
751		Notes
752		-----
753		Because of the way the :class:`StepRule` API works, this particular
754		rule implements norm clipping of the value after update in the
755		following way: it computes ``parameter - previous_step``, scales it
756		to have (possibly axes-wise) norm(s) of at most `threshold`,
757		then subtracts that value from `parameter` to yield an 'equivalent
758		step' that respects the desired norm constraints. This procedure
759		implicitly assumes one is doing simple (stochastic) gradient descent,
760		and so steps computed by this step rule may not make sense for use
761		in other contexts.
762
763		Investigations into max-norm regularization date from [Srebro2005]_.
764		The first appearance of this technique as a regularization method
765		for the weight vectors of individual hidden units in feed-forward
766		neural networks may be [Hinton2012]_.
767
768		.. [Srebro2005] Nathan Srebro and Adi Shraibman.

mila-udem / blocks

Code Duplication Length = 41-46 lines in 2 locations

blocks/algorithms/__init__.py 2 locations

blocks/algorithms/init.py 2 locations