Code Duplication    Length = 41-46 lines in 2 locations

blocks/algorithms/__init__.py 2 locations

@@ 522-567 (lines=46) @@
519
    :class:`SharedVariableModifier`
520
521
    """
522
    def __init__(self, learning_rate=1.0, momentum=0.):
523
        scale = Scale(learning_rate=learning_rate)
524
        basic_momentum = BasicMomentum(momentum=momentum)
525
        self.learning_rate = scale.learning_rate
526
        self.momentum = basic_momentum.momentum
527
        self.components = [scale, basic_momentum]
528
529
530
class AdaDelta(StepRule):
531
    """Adapts the step size over time using only first order information.
532
533
    Parameters
534
    ----------
535
    decay_rate : float, optional
536
        Decay rate in [0, 1]. Defaults to 0.95.
537
    epsilon : float, optional
538
        Stabilizing constant for RMS. Defaults to 1e-6.
539
540
    Notes
541
    -----
542
    For more information, see [ADADELTA]_.
543
544
    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
545
       Rate Method*, arXiv:1212.5701.
546
547
    """
548
    def __init__(self, decay_rate=0.95, epsilon=1e-6):
549
        if not 0.0 <= decay_rate <= 1.0:
550
            raise ValueError("decay rate needs to be in [0, 1]")
551
        self.decay_rate = shared_floatx(decay_rate, "decay_rate")
552
        add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
553
        self.epsilon = shared_floatx(epsilon, "epsilon")
554
        add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
555
556
    def compute_step(self, parameter, previous_step):
557
        mean_square_step_tm1 = _create_algorithm_buffer_for(
558
            parameter, "mean_square_step_tm1")
559
        mean_square_delta_x_tm1 = _create_algorithm_buffer_for(
560
            parameter, "mean_square_delta_x_tm1")
561
562
        mean_square_step_t = (
563
            self.decay_rate * mean_square_step_tm1 +
564
            (1 - self.decay_rate) * tensor.sqr(previous_step)
565
        )
566
567
        rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
568
        rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)
569
        delta_x_t = rms_delta_x_tm1 / rms_step_t * previous_step
570
@@ 725-765 (lines=41) @@
722
    axis : int or iterable, optional
723
        An integer single axis, or an iterable collection of integer
724
        axes over which to sum in order to calculate the L2 norm. If
725
        `None` (the default), the norm is computed over all elements
726
        of the tensor.
727
728
    Notes
729
    -----
730
    Because of the way the :class:`StepRule` API works, this particular
731
    rule implements norm clipping of the value *after* update in the
732
    following way: it computes ``parameter - previous_step``, scales it
733
    to have (possibly axes-wise) norm(s) of at most `threshold`,
734
    then subtracts *that* value from `parameter` to yield an 'equivalent
735
    step' that respects the desired norm constraints. This procedure
736
    implicitly assumes one is doing simple (stochastic) gradient descent,
737
    and so steps computed by this step rule may not make sense for use
738
    in other contexts.
739
740
    Investigations into max-norm regularization date from [Srebro2005]_.
741
    The first appearance of this technique as a regularization method
742
    for the weight vectors of individual hidden units in feed-forward
743
    neural networks may be [Hinton2012]_.
744
745
    .. [Srebro2005] Nathan Srebro and Adi Shraibman.
746
       "Rank, Trace-Norm and Max-Norm". *18th Annual Conference
747
       on Learning Theory (COLT)*, June 2005.
748
749
    .. [Hinton2012] Geoffrey E. Hinton, Nitish Srivastava,
750
       Alex Krizhevsky, Ilya Sutskever, Ruslan R. Salakhutdinov.
751
       "Improving neural networks by preventing co-adaptation of
752
       feature detectors". arXiv:1207.0580.
753
754
    """
755
    def __init__(self, threshold, axis=None):
756
        axis = pack(axis) if axis is not None else ()
757
        self.axis = set(axis)
758
        self.threshold = shared_floatx(threshold, "threshold")
759
        add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
760
        if len(axis) != len(self.axis):
761
            raise ValueError("axis must be unique")
762
763
    def compute_step(self, parameter, previous_step):
764
        if any(ax >= previous_step.ndim for ax in self.axis):
765
            raise ValueError("Invalid axis {} for {}, ndim={}".format(
766
                self.axis, parameter, previous_step.ndim))
767
        if len(self.axis) == 0:
768
            norms = l2_norm([parameter - previous_step])