Code Duplication    Length = 41-46 lines in 2 locations

blocks/algorithms/__init__.py 2 locations

@@ 522-567 (lines=46) @@
519
        How fast the running average decays, value in [0, 1]
520
        (lower is faster).  Defaults to 0.9.
521
    max_scaling : float, optional
522
        Maximum scaling of the step size, in case the running average is
523
        really small. Needs to be greater than 0. Defaults to 1e5.
524
525
    Notes
526
    -----
527
    This step rule is intended to be used in conjunction with another
528
    step rule, _e.g._ :class:`Scale`. For an all-batteries-included
529
    experience, look at :class:`RMSProp`.
530
531
    In general, this step rule should be used _before_ other step rules,
532
    because it has normalization properties that may undo their work.
533
    For instance, it should be applied first when used in conjunction
534
    with :class:`Scale`.
535
536
    For more information, see [Hint2014]_.
537
538
    """
539
    def __init__(self, decay_rate=0.9, max_scaling=1e5):
540
        if not 0.0 <= decay_rate <= 1.0:
541
            raise ValueError("decay rate needs to be in [0, 1]")
542
        if max_scaling <= 0:
543
            raise ValueError("max. scaling needs to be greater than 0")
544
        self.decay_rate = shared_floatx(decay_rate, "decay_rate")
545
        add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
546
        self.epsilon = 1. / max_scaling
547
548
    def compute_step(self, parameter, previous_step):
549
        mean_square_step_tm1 = _create_algorithm_buffer_for(
550
            parameter, "mean_square_step_tm1")
551
        mean_square_step_t = (
552
            self.decay_rate * mean_square_step_tm1 +
553
            (1 - self.decay_rate) * tensor.sqr(previous_step))
554
        rms_step_t = tensor.maximum(
555
            tensor.sqrt(mean_square_step_t), self.epsilon)
556
        step = previous_step / rms_step_t
557
        updates = [(mean_square_step_tm1, mean_square_step_t)]
558
        return step, updates
559
560
561
class RMSProp(CompositeRule):
562
    """Scales the step size by a running average of the recent step norms.
563
564
    Combines :class:`BasicRMSProp` and :class:`Scale` to form the step rule
565
    described in [Hint2014]_.
566
567
    .. [Hint2014] Geoff Hinton, *Neural Networks for Machine Learning*,
568
       lecture 6a,
569
       http://cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
570
@@ 725-765 (lines=41) @@
722
        Step size.
723
        Default value is set to 0.0002.
724
    epsilon : float, optional
725
        Stabilizing constant for one over root of sum of squares.
726
        Defaults to 1e-6.
727
728
    Notes
729
    -----
730
    For more information, see [ADAGRAD]_.
731
732
    .. [ADADGRAD] Duchi J, Hazan E, Singer Y.,
733
       *Adaptive subgradient methods for online learning and
734
        stochastic optimization*,
735
       http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
736
737
    """
738
    def __init__(self, learning_rate=0.002, epsilon=1e-6):
739
        self.learning_rate = shared_floatx(learning_rate, "learning_rate")
740
        self.epsilon = shared_floatx(epsilon, "epsilon")
741
        add_role(self.learning_rate, ALGORITHM_HYPERPARAMETER)
742
        add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
743
744
    def compute_step(self, parameter, previous_step):
745
        name = 'adagrad_sqs'
746
        if parameter.name:
747
            name += '_' + parameter.name
748
        ssq = _create_algorithm_buffer_for(parameter, name=name)
749
750
        ssq_t = (tensor.sqr(previous_step) + ssq)
751
        step = (self.learning_rate * previous_step /
752
                (tensor.sqrt(ssq_t) + self.epsilon))
753
754
        updates = [(ssq, ssq_t)]
755
756
        return step, updates
757
758
759
class Adam(StepRule):
760
    """Adam optimizer as described in [King2014]_.
761
762
    .. [King2014] Diederik Kingma, Jimmy Ba,
763
       *Adam: A Method for Stochastic Optimization*,
764
       http://arxiv.org/abs/1412.6980
765
766
    Parameters
767
    ----------
768
    learning_rate : float, optional