Code Duplication - mila-udem/blocks - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 41-46 lines in 2 locations

blocks/algorithms/init.py 2 locations


    See Also
    --------
    :class:`SharedVariableModifier`

    """
    def __init__(self, learning_rate=1.0, momentum=0.):
        scale = Scale(learning_rate=learning_rate)
        basic_momentum = BasicMomentum(momentum=momentum)
        self.learning_rate = scale.learning_rate
        self.momentum = basic_momentum.momentum
        self.components = [scale, basic_momentum]


class AdaDelta(StepRule):
    """Adapts the step size over time using only first order information.

    Parameters
    ----------
    decay_rate : float, optional
        Decay rate in [0, 1]. Defaults to 0.95.
    epsilon : float, optional
        Stabilizing constant for RMS. Defaults to 1e-6.

    Notes
    -----
    For more information, see [ADADELTA]_.

    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
       Rate Method*, arXiv:1212.5701.

    """
    def __init__(self, decay_rate=0.95, epsilon=1e-6):
        if not 0.0 <= decay_rate <= 1.0:
            raise ValueError("decay rate needs to be in [0, 1]")
        self.decay_rate = shared_floatx(decay_rate, "decay_rate")
        add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
        self.epsilon = shared_floatx(epsilon, "epsilon")
        add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)

    def compute_step(self, parameter, previous_step):
        mean_square_step_tm1 = _create_algorithm_buffer_for(
            parameter, "mean_square_step_tm1")
        mean_square_delta_x_tm1 = _create_algorithm_buffer_for(
            parameter, "mean_square_delta_x_tm1")

        mean_square_step_t = (
            self.decay_rate * mean_square_step_tm1 +
            (1 - self.decay_rate) * tensor.sqr(previous_step)
        )

        rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
        rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)

    threshold : float
        Maximum norm for a given (portion of a) tensor.
    axis : int or iterable, optional
        An integer single axis, or an iterable collection of integer
        axes over which to sum in order to calculate the L2 norm. If
        `None` (the default), the norm is computed over all elements
        of the tensor.

    Notes
    -----
    Because of the way the :class:`StepRule` API works, this particular
    rule implements norm clipping of the value *after* update in the
    following way: it computes ``parameter - previous_step``, scales it
    to have (possibly axes-wise) norm(s) of at most `threshold`,
    then subtracts *that* value from `parameter` to yield an 'equivalent
    step' that respects the desired norm constraints. This procedure
    implicitly assumes one is doing simple (stochastic) gradient descent,
    and so steps computed by this step rule may not make sense for use
    in other contexts.

    Investigations into max-norm regularization date from [Srebro2005]_.
    The first appearance of this technique as a regularization method
    for the weight vectors of individual hidden units in feed-forward
    neural networks may be [Hinton2012]_.

    .. [Srebro2005] Nathan Srebro and Adi Shraibman.
       "Rank, Trace-Norm and Max-Norm". *18th Annual Conference
       on Learning Theory (COLT)*, June 2005.

    .. [Hinton2012] Geoffrey E. Hinton, Nitish Srivastava,
       Alex Krizhevsky, Ilya Sutskever, Ruslan R. Salakhutdinov.
       "Improving neural networks by preventing co-adaptation of
       feature detectors". arXiv:1207.0580.

    """
    def __init__(self, threshold, axis=None):
        axis = pack(axis) if axis is not None else ()
        self.axis = set(axis)
        self.threshold = shared_floatx(threshold, "threshold")
        add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
        if len(axis) != len(self.axis):
            raise ValueError("axis must be unique")

    def compute_step(self, parameter, previous_step):
        if any(ax >= previous_step.ndim for ax in self.axis):
            raise ValueError("Invalid axis {} for {}, ndim={}".format(
                self.axis, parameter, previous_step.ndim))

		@@ 522-567 (lines=46) @@
519		See Also
520		--------
521		:class:`SharedVariableModifier`
522
523		"""
524		def __init__(self, learning_rate=1.0, momentum=0.):
525		scale = Scale(learning_rate=learning_rate)
526		basic_momentum = BasicMomentum(momentum=momentum)
527		self.learning_rate = scale.learning_rate
528		self.momentum = basic_momentum.momentum
529		self.components = [scale, basic_momentum]
530
531
532		class AdaDelta(StepRule):
533		"""Adapts the step size over time using only first order information.
534
535		Parameters
536		----------
537		decay_rate : float, optional
538		Decay rate in [0, 1]. Defaults to 0.95.
539		epsilon : float, optional
540		Stabilizing constant for RMS. Defaults to 1e-6.
541
542		Notes
543		-----
544		For more information, see [ADADELTA]_.
545
546		.. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
547		Rate Method*, arXiv:1212.5701.
548
549		"""
550		def __init__(self, decay_rate=0.95, epsilon=1e-6):
551		if not 0.0 <= decay_rate <= 1.0:
552		raise ValueError("decay rate needs to be in [0, 1]")
553		self.decay_rate = shared_floatx(decay_rate, "decay_rate")
554		add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
555		self.epsilon = shared_floatx(epsilon, "epsilon")
556		add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
557
558		def compute_step(self, parameter, previous_step):
559		mean_square_step_tm1 = _create_algorithm_buffer_for(
560		parameter, "mean_square_step_tm1")
561		mean_square_delta_x_tm1 = _create_algorithm_buffer_for(
562		parameter, "mean_square_delta_x_tm1")
563
564		mean_square_step_t = (
565		self.decay_rate * mean_square_step_tm1 +
566		(1 - self.decay_rate) * tensor.sqr(previous_step)
567		)
568
569		rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
570		rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)
		@@ 725-765 (lines=41) @@
722		threshold : float
723		Maximum norm for a given (portion of a) tensor.
724		axis : int or iterable, optional
725		An integer single axis, or an iterable collection of integer
726		axes over which to sum in order to calculate the L2 norm. If
727		`None` (the default), the norm is computed over all elements
728		of the tensor.
729
730		Notes
731		-----
732		Because of the way the :class:`StepRule` API works, this particular
733		rule implements norm clipping of the value after update in the
734		following way: it computes ``parameter - previous_step``, scales it
735		to have (possibly axes-wise) norm(s) of at most `threshold`,
736		then subtracts that value from `parameter` to yield an 'equivalent
737		step' that respects the desired norm constraints. This procedure
738		implicitly assumes one is doing simple (stochastic) gradient descent,
739		and so steps computed by this step rule may not make sense for use
740		in other contexts.
741
742		Investigations into max-norm regularization date from [Srebro2005]_.
743		The first appearance of this technique as a regularization method
744		for the weight vectors of individual hidden units in feed-forward
745		neural networks may be [Hinton2012]_.
746
747		.. [Srebro2005] Nathan Srebro and Adi Shraibman.
748		"Rank, Trace-Norm and Max-Norm". *18th Annual Conference
749		on Learning Theory (COLT)*, June 2005.
750
751		.. [Hinton2012] Geoffrey E. Hinton, Nitish Srivastava,
752		Alex Krizhevsky, Ilya Sutskever, Ruslan R. Salakhutdinov.
753		"Improving neural networks by preventing co-adaptation of
754		feature detectors". arXiv:1207.0580.
755
756		"""
757		def __init__(self, threshold, axis=None):
758		axis = pack(axis) if axis is not None else ()
759		self.axis = set(axis)
760		self.threshold = shared_floatx(threshold, "threshold")
761		add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
762		if len(axis) != len(self.axis):
763		raise ValueError("axis must be unique")
764
765		def compute_step(self, parameter, previous_step):
766		if any(ax >= previous_step.ndim for ax in self.axis):
767		raise ValueError("Invalid axis {} for {}, ndim={}".format(
768		self.axis, parameter, previous_step.ndim))

mila-udem / blocks

Code Duplication Length = 41-46 lines in 2 locations

blocks/algorithms/__init__.py 2 locations

blocks/algorithms/init.py 2 locations