Code Duplication    Length = 41-46 lines in 2 locations

blocks/algorithms/__init__.py 2 locations

@@ 522-567 (lines=46) @@
519
    See Also
520
    --------
521
    :class:`SharedVariableModifier`
522
523
    """
524
    def __init__(self, learning_rate=1.0, momentum=0.):
525
        scale = Scale(learning_rate=learning_rate)
526
        basic_momentum = BasicMomentum(momentum=momentum)
527
        self.learning_rate = scale.learning_rate
528
        self.momentum = basic_momentum.momentum
529
        self.components = [scale, basic_momentum]
530
531
532
class AdaDelta(StepRule):
533
    """Adapts the step size over time using only first order information.
534
535
    Parameters
536
    ----------
537
    decay_rate : float, optional
538
        Decay rate in [0, 1]. Defaults to 0.95.
539
    epsilon : float, optional
540
        Stabilizing constant for RMS. Defaults to 1e-6.
541
542
    Notes
543
    -----
544
    For more information, see [ADADELTA]_.
545
546
    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
547
       Rate Method*, arXiv:1212.5701.
548
549
    """
550
    def __init__(self, decay_rate=0.95, epsilon=1e-6):
551
        if not 0.0 <= decay_rate <= 1.0:
552
            raise ValueError("decay rate needs to be in [0, 1]")
553
        self.decay_rate = shared_floatx(decay_rate, "decay_rate")
554
        add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
555
        self.epsilon = shared_floatx(epsilon, "epsilon")
556
        add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
557
558
    def compute_step(self, parameter, previous_step):
559
        mean_square_step_tm1 = _create_algorithm_buffer_for(
560
            parameter, "mean_square_step_tm1")
561
        mean_square_delta_x_tm1 = _create_algorithm_buffer_for(
562
            parameter, "mean_square_delta_x_tm1")
563
564
        mean_square_step_t = (
565
            self.decay_rate * mean_square_step_tm1 +
566
            (1 - self.decay_rate) * tensor.sqr(previous_step)
567
        )
568
569
        rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
570
        rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)
@@ 725-765 (lines=41) @@
722
    threshold : float
723
        Maximum norm for a given (portion of a) tensor.
724
    axis : int or iterable, optional
725
        An integer single axis, or an iterable collection of integer
726
        axes over which to sum in order to calculate the L2 norm. If
727
        `None` (the default), the norm is computed over all elements
728
        of the tensor.
729
730
    Notes
731
    -----
732
    Because of the way the :class:`StepRule` API works, this particular
733
    rule implements norm clipping of the value *after* update in the
734
    following way: it computes ``parameter - previous_step``, scales it
735
    to have (possibly axes-wise) norm(s) of at most `threshold`,
736
    then subtracts *that* value from `parameter` to yield an 'equivalent
737
    step' that respects the desired norm constraints. This procedure
738
    implicitly assumes one is doing simple (stochastic) gradient descent,
739
    and so steps computed by this step rule may not make sense for use
740
    in other contexts.
741
742
    Investigations into max-norm regularization date from [Srebro2005]_.
743
    The first appearance of this technique as a regularization method
744
    for the weight vectors of individual hidden units in feed-forward
745
    neural networks may be [Hinton2012]_.
746
747
    .. [Srebro2005] Nathan Srebro and Adi Shraibman.
748
       "Rank, Trace-Norm and Max-Norm". *18th Annual Conference
749
       on Learning Theory (COLT)*, June 2005.
750
751
    .. [Hinton2012] Geoffrey E. Hinton, Nitish Srivastava,
752
       Alex Krizhevsky, Ilya Sutskever, Ruslan R. Salakhutdinov.
753
       "Improving neural networks by preventing co-adaptation of
754
       feature detectors". arXiv:1207.0580.
755
756
    """
757
    def __init__(self, threshold, axis=None):
758
        axis = pack(axis) if axis is not None else ()
759
        self.axis = set(axis)
760
        self.threshold = shared_floatx(threshold, "threshold")
761
        add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
762
        if len(axis) != len(self.axis):
763
            raise ValueError("axis must be unique")
764
765
    def compute_step(self, parameter, previous_step):
766
        if any(ax >= previous_step.ndim for ax in self.axis):
767
            raise ValueError("Invalid axis {} for {}, ndim={}".format(
768
                self.axis, parameter, previous_step.ndim))