Code Duplication - mila-udem/blocks - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 41-46 lines in 2 locations

blocks/algorithms/init.py 2 locations



    Combines :class:`BasicMomentum` and :class:`Scale` to form the
    usual momentum step rule.

    Parameters
    ----------
    learning_rate : float, optional
        The learning rate by which the previous step scaled. Defaults to 1.
    momentum : float, optional
        The momentum coefficient. Defaults to 0.

    Attributes
    ----------
    learning_rate : :class:`~tensor.SharedVariable`
        A variable for learning rate.
    momentum : :class:`~tensor.SharedVariable`
        A variable for momentum.

    See Also
    --------
    :class:`SharedVariableModifier`

    """
    def __init__(self, learning_rate=1.0, momentum=0.):
        scale = Scale(learning_rate=learning_rate)
        basic_momentum = BasicMomentum(momentum=momentum)
        self.learning_rate = scale.learning_rate
        self.momentum = basic_momentum.momentum
        self.components = [scale, basic_momentum]


class AdaDelta(StepRule):
    """Adapts the step size over time using only first order information.

    Parameters
    ----------
    decay_rate : float, optional
        Decay rate in [0, 1]. Defaults to 0.95.
    epsilon : float, optional
        Stabilizing constant for RMS. Defaults to 1e-6.

    Notes
    -----
    For more information, see [ADADELTA]_.

    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
       Rate Method*, arXiv:1212.5701.

    """
    def __init__(self, decay_rate=0.95, epsilon=1e-6):
        if not 0.0 <= decay_rate <= 1.0:
            raise ValueError("decay rate needs to be in [0, 1]")

            multiplier = tensor.switch(norm < self.threshold,
                                       1, self.threshold / norm)
            steps = OrderedDict(
                (parameter, step * multiplier)
                for parameter, step in previous_steps.items())
        return steps, []


class VariableClipping(StepRule):
    """Clip the maximum norm of individual variables along certain axes.

    This :class:`StepRule` can be used to implement L2 norm constraints on
    e.g. the weight vectors of individual hidden units, convolutional
    filters or entire weight tensors. Combine with :class:`Restrict`
    (and possibly :class:`CompositeRule`), to apply such constraints only
    to certain variables and/or apply different norm constraints to
    different variables.

    Parameters
    ----------
    threshold : float
        Maximum norm for a given (portion of a) tensor.
    axis : int or iterable, optional
        An integer single axis, or an iterable collection of integer
        axes over which to sum in order to calculate the L2 norm. If
        `None` (the default), the norm is computed over all elements
        of the tensor.

    Notes
    -----
    Because of the way the :class:`StepRule` API works, this particular
    rule implements norm clipping of the value *after* update in the
    following way: it computes ``parameter - previous_step``, scales it
    to have (possibly axes-wise) norm(s) of at most `threshold`,
    then subtracts *that* value from `parameter` to yield an 'equivalent
    step' that respects the desired norm constraints. This procedure
    implicitly assumes one is doing simple (stochastic) gradient descent,
    and so steps computed by this step rule may not make sense for use
    in other contexts.

    Investigations into max-norm regularization date from [Srebro2005]_.
    The first appearance of this technique as a regularization method
    for the weight vectors of individual hidden units in feed-forward
    neural networks may be [Hinton2012]_.

    .. [Srebro2005] Nathan Srebro and Adi Shraibman.
       "Rank, Trace-Norm and Max-Norm". *18th Annual Conference

		@@ 522-567 (lines=46) @@
519
520		Combines :class:`BasicMomentum` and :class:`Scale` to form the
521		usual momentum step rule.
522
523		Parameters
524		----------
525		learning_rate : float, optional
526		The learning rate by which the previous step scaled. Defaults to 1.
527		momentum : float, optional
528		The momentum coefficient. Defaults to 0.
529
530		Attributes
531		----------
532		learning_rate : :class:`~tensor.SharedVariable`
533		A variable for learning rate.
534		momentum : :class:`~tensor.SharedVariable`
535		A variable for momentum.
536
537		See Also
538		--------
539		:class:`SharedVariableModifier`
540
541		"""
542		def __init__(self, learning_rate=1.0, momentum=0.):
543		scale = Scale(learning_rate=learning_rate)
544		basic_momentum = BasicMomentum(momentum=momentum)
545		self.learning_rate = scale.learning_rate
546		self.momentum = basic_momentum.momentum
547		self.components = [scale, basic_momentum]
548
549
550		class AdaDelta(StepRule):
551		"""Adapts the step size over time using only first order information.
552
553		Parameters
554		----------
555		decay_rate : float, optional
556		Decay rate in [0, 1]. Defaults to 0.95.
557		epsilon : float, optional
558		Stabilizing constant for RMS. Defaults to 1e-6.
559
560		Notes
561		-----
562		For more information, see [ADADELTA]_.
563
564		.. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
565		Rate Method*, arXiv:1212.5701.
566
567		"""
568		def __init__(self, decay_rate=0.95, epsilon=1e-6):
569		if not 0.0 <= decay_rate <= 1.0:
570		raise ValueError("decay rate needs to be in [0, 1]")
		@@ 725-765 (lines=41) @@
722		multiplier = tensor.switch(norm < self.threshold,
723		1, self.threshold / norm)
724		steps = OrderedDict(
725		(parameter, step * multiplier)
726		for parameter, step in previous_steps.items())
727		return steps, []
728
729
730		class VariableClipping(StepRule):
731		"""Clip the maximum norm of individual variables along certain axes.
732
733		This :class:`StepRule` can be used to implement L2 norm constraints on
734		e.g. the weight vectors of individual hidden units, convolutional
735		filters or entire weight tensors. Combine with :class:`Restrict`
736		(and possibly :class:`CompositeRule`), to apply such constraints only
737		to certain variables and/or apply different norm constraints to
738		different variables.
739
740		Parameters
741		----------
742		threshold : float
743		Maximum norm for a given (portion of a) tensor.
744		axis : int or iterable, optional
745		An integer single axis, or an iterable collection of integer
746		axes over which to sum in order to calculate the L2 norm. If
747		`None` (the default), the norm is computed over all elements
748		of the tensor.
749
750		Notes
751		-----
752		Because of the way the :class:`StepRule` API works, this particular
753		rule implements norm clipping of the value after update in the
754		following way: it computes ``parameter - previous_step``, scales it
755		to have (possibly axes-wise) norm(s) of at most `threshold`,
756		then subtracts that value from `parameter` to yield an 'equivalent
757		step' that respects the desired norm constraints. This procedure
758		implicitly assumes one is doing simple (stochastic) gradient descent,
759		and so steps computed by this step rule may not make sense for use
760		in other contexts.
761
762		Investigations into max-norm regularization date from [Srebro2005]_.
763		The first appearance of this technique as a regularization method
764		for the weight vectors of individual hidden units in feed-forward
765		neural networks may be [Hinton2012]_.
766
767		.. [Srebro2005] Nathan Srebro and Adi Shraibman.
768		"Rank, Trace-Norm and Max-Norm". *18th Annual Conference

mila-udem / blocks

Code Duplication Length = 41-46 lines in 2 locations

blocks/algorithms/__init__.py 2 locations

blocks/algorithms/init.py 2 locations