experiments.attention_models.AttentionLayer._setup_params() - Code Metrics - uaca/deepy - Measure and Improve Code Quality continuously with Scrutinizer

_setup_params() B
last analyzed 2016-01-12 06:29 UTC

↳ Parent: experiments.attention_models.AttentionLayer

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	1
dl	0
loc	31
rs	8.8571

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os

import numpy as np
from numpy import linalg as LA
from theano import tensor as T
import theano
from theano.tensor.shared_randomstreams import RandomStreams

from deepy import NeuralClassifier, NetworkConfig
from deepy.utils import build_activation, disconnected_grad
from deepy.utils.functions import FLOATX
from deepy.networks import NeuralLayer
from experiments.attention_models.gaussian_sampler import SampleMultivariateGaussian


class AttentionLayer(NeuralLayer):

    def __init__(self, activation='relu', std=0.1, disable_reinforce=False, random_glimpse=False):
        self.disable_reinforce = disable_reinforce
        self.random_glimpse = random_glimpse
        self.gaussian_std = std
        super(AttentionLayer, self).__init__(10, activation)

    def connect(self, config, vars, x, input_n, id="UNKNOWN"):
        self._config = config
        self._vars = vars
        self.input_n = input_n
        self.id = id
        self.x = x
        self._setup_params()
        self._setup_functions()
        self.connected = True

    def _glimpse_sensor(self, x_t, l_p):
        """
        Parameters:
            x_t - 28x28 image
            l_p - 2x1 focus vector
        Returns:
            4x12 matrix
        """
        # Turn l_p to the left-top point of rectangle
        l_p = l_p * 14 + 14 - 2
        l_p = T.cast(T.round(l_p), "int32")

        l_p = l_p * (l_p >= 0)
        l_p = l_p * (l_p < 24) + (l_p >= 24) * 23
        l_p2 = l_p - 2
        l_p2 = l_p2 * (l_p2 >= 0)
        l_p2 = l_p2 * (l_p2 < 20) + (l_p2 >= 20) * 19
        l_p3 = l_p - 6
        l_p3 = l_p3 * (l_p3 >= 0)
        l_p3 = l_p3 * (l_p3 < 16) + (l_p3 >= 16) * 15
        glimpse_1 = x_t[l_p[0]: l_p[0] + 4][:, l_p[1]: l_p[1] + 4]
        glimpse_2 = x_t[l_p2[0]: l_p2[0] + 8][:, l_p2[1]: l_p2[1] + 8]
        glimpse_2 = theano.tensor.signal.downsample.max_pool_2d(glimpse_2, (2,2))
        glimpse_3 = x_t[l_p3[0]: l_p3[0] + 16][:, l_p3[1]: l_p3[1] + 16]
        glimpse_3 = theano.tensor.signal.downsample.max_pool_2d(glimpse_3, (4,4))
        return T.concatenate([glimpse_1, glimpse_2, glimpse_3])

    def _refined_glimpse_sensor(self, x_t, l_p):

        """
        Parameters:
            x_t - 28x28 image
            l_p - 2x1 focus vector
        Returns:
            7*14 matrix
        """
        # Turn l_p to the left-top point of rectangle
        l_p = l_p * 14 + 14 - 4
        l_p = T.cast(T.round(l_p), "int32")

        l_p = l_p * (l_p >= 0)
        l_p = l_p * (l_p < 21) + (l_p >= 21) * 20
        glimpse_1 = x_t[l_p[0]: l_p[0] + 7][:, l_p[1]: l_p[1] + 7]
        # glimpse_2 = theano.tensor.signal.downsample.max_pool_2d(x_t, (4,4))
        # return T.concatenate([glimpse_1, glimpse_2])
        return glimpse_1

    def _multi_gaussian_pdf(self, vec, mean):
        norm2d_var = ((1.0 / T.sqrt((2*np.pi)**2 * self.cov_det_var)) *
                      T.exp(-0.5 * ((vec-mean).T.dot(self.cov_inv_var).dot(vec-mean))))
        return norm2d_var

    def _glimpse_network(self, x_t, l_p):
        """
        Parameters:
            x_t - 28x28 image
            l_p - 2x1 focus vector
        Returns:
            4x12 matrix
        """
        sensor_output = self._refined_glimpse_sensor(x_t, l_p)
        sensor_output = T.flatten(sensor_output)
        h_g = self._relu(T.dot(sensor_output, self.W_g0))
        h_l = self._relu(T.dot(l_p, self.W_g1))
        g = self._relu(T.dot(h_g, self.W_g2_hg) + T.dot(h_l, self.W_g2_hl))
        return g

    def _location_network(self, h_t):
        """
        Parameters:
            h_t - 256x1 vector
        Returns:
            2x1 focus vector
        """
        return T.dot(h_t, self.W_l)

    def _action_network(self, h_t):
        """
        Parameters:
            h_t - 256x1 vector
        Returns:
            10x1 vector
        """
        z = self._relu(T.dot(h_t, self.W_a) + self.B_a)
        return self._softmax(z)

    def _core_network(self, l_p, h_p, x_t):
        """
        Parameters:
            x_t - 28x28 image
            l_p - 2x1 focus vector
            h_p - 256x1 vector
        Returns:
            h_t, 256x1 vector
        """
        g_t = self._glimpse_network(x_t, l_p)
        h_t = self._tanh(T.dot(g_t, self.W_h_g) + T.dot(h_p, self.W_h) + self.B_h)
        l_t = self._location_network(h_t)

        if not self.disable_reinforce:
            sampled_l_t = self._sample_gaussian(l_t, self.cov)
            sampled_pdf = self._multi_gaussian_pdf(disconnected_grad(sampled_l_t), l_t)
            wl_grad = T.grad(T.log(sampled_pdf), self.W_l)
        else:
            sampled_l_t = l_t
            wl_grad = self.W_l

        if self.random_glimpse and self.disable_reinforce:
            sampled_l_t = self.srng.uniform((2,)) * 0.8

        a_t = self._action_network(h_t)

        return sampled_l_t, h_t, a_t, wl_grad


    def _output_func(self):
        self.x = self.x.reshape((28, 28))
        [l_ts, h_ts, a_ts, wl_grads], _ = theano.scan(fn=self._core_network,
                         outputs_info=[self.l0, self.h0, None, None],
                         non_sequences=[self.x],
                         n_steps=5)

        self.positions = l_ts
        self.last_decision = T.argmax(a_ts[-1])
        wl_grad = T.sum(wl_grads, axis=0) / wl_grads.shape[0]
        self.wl_grad = wl_grad
        return a_ts[-1].reshape((1,10))

    def _setup_functions(self):
        self._assistive_params = []
        self._relu = build_activation("tanh")
        self._tanh = build_activation("tanh")
        self._softmax = build_activation("softmax")
        self.output_func = self._output_func()

    def _setup_params(self):
        self.srng = RandomStreams(seed=234)
        self.large_cov = np.array([[0.06,0],[0,0.06]], dtype=FLOATX)
        self.small_cov = np.array([[self.gaussian_std,0],[0,self.gaussian_std]], dtype=FLOATX)
        self.cov = theano.shared(np.array(self.small_cov, dtype=FLOATX))
        self.cov_inv_var = theano.shared(np.array(LA.inv(self.small_cov), dtype=FLOATX))
        self.cov_det_var = theano.shared(np.array(LA.det(self.small_cov), dtype=FLOATX))
        self._sample_gaussian = SampleMultivariateGaussian()

        self.W_g0 = self.create_weight(7*7, 128, suffix="g0")
        self.W_g1 = self.create_weight(2, 128, suffix="g1")
        self.W_g2_hg = self.create_weight(128, 256, suffix="g2_hg")
        self.W_g2_hl = self.create_weight(128, 256, suffix="g2_hl")

        self.W_h_g = self.create_weight(256, 256, suffix="h_g")
        self.W_h = self.create_weight(256, 256, suffix="h")
        self.B_h = self.create_bias(256, suffix="h")
        self.h0 = self.create_vector(256, "h0")
        self.l0 = self.create_vector(2, "l0")
        self.l0.set_value(np.array([-1, -1], dtype=FLOATX))

        self.W_l = self.create_weight(256, 2, suffix="l")
        self.W_l.set_value(self.W_l.get_value() / 10)
        self.B_l = self.create_bias(2, suffix="l")
        self.W_a = self.create_weight(256, 10, suffix="a")
        self.B_a = self.create_bias(10, suffix="a")


        self.W = [self.W_g0, self.W_g1, self.W_g2_hg, self.W_g2_hl, self.W_h_g, self.W_h, self.W_a]
        self.B = [self.B_h, self.B_a]
        self.parameters = [self.W_l]


def get_network(model=None, std=0.005, disable_reinforce=False, random_glimpse=False):
    """
    Get baseline model.
    Parameters:
        model - model path
    Returns:
        network
    """
    network = NeuralClassifier(input_dim=28*28)
    network.stack_layer(AttentionLayer(std=std, disable_reinforce=disable_reinforce, random_glimpse=random_glimpse))
    if model and os.path.exists(model):
        network.load_params(model)
    return network



1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3			import os
4
5			import numpy as np
6			from numpy import linalg as LA
7			from theano import tensor as T
8			import theano
9			from theano.tensor.shared_randomstreams import RandomStreams
10
11			from deepy import NeuralClassifier, NetworkConfig
12			from deepy.utils import build_activation, disconnected_grad
13			from deepy.utils.functions import FLOATX
14			from deepy.networks import NeuralLayer
15			from experiments.attention_models.gaussian_sampler import SampleMultivariateGaussian
16
17
18			class AttentionLayer(NeuralLayer):
19
20			def __init__(self, activation='relu', std=0.1, disable_reinforce=False, random_glimpse=False):
21			self.disable_reinforce = disable_reinforce
22			self.random_glimpse = random_glimpse
23			self.gaussian_std = std
24			super(AttentionLayer, self).__init__(10, activation)
25
26			def connect(self, config, vars, x, input_n, id="UNKNOWN"):
27			self._config = config
28			self._vars = vars
29			self.input_n = input_n
30			self.id = id
31			self.x = x
32			self._setup_params()
33			self._setup_functions()
34			self.connected = True
35
36			def _glimpse_sensor(self, x_t, l_p):
37			"""
38			Parameters:
39			x_t - 28x28 image
40			l_p - 2x1 focus vector
41			Returns:
42			4x12 matrix
43			"""
44			# Turn l_p to the left-top point of rectangle
45			l_p = l_p * 14 + 14 - 2
46			l_p = T.cast(T.round(l_p), "int32")
47
48			l_p = l_p * (l_p >= 0)
49			l_p = l_p * (l_p < 24) + (l_p >= 24) * 23
50			l_p2 = l_p - 2
51			l_p2 = l_p2 * (l_p2 >= 0)
52			l_p2 = l_p2 * (l_p2 < 20) + (l_p2 >= 20) * 19
53			l_p3 = l_p - 6
54			l_p3 = l_p3 * (l_p3 >= 0)
55			l_p3 = l_p3 * (l_p3 < 16) + (l_p3 >= 16) * 15
56			glimpse_1 = x_t[l_p[0]: l_p[0] + 4][:, l_p[1]: l_p[1] + 4]
57			glimpse_2 = x_t[l_p2[0]: l_p2[0] + 8][:, l_p2[1]: l_p2[1] + 8]
58			glimpse_2 = theano.tensor.signal.downsample.max_pool_2d(glimpse_2, (2,2))
59			glimpse_3 = x_t[l_p3[0]: l_p3[0] + 16][:, l_p3[1]: l_p3[1] + 16]
60			glimpse_3 = theano.tensor.signal.downsample.max_pool_2d(glimpse_3, (4,4))
61			return T.concatenate([glimpse_1, glimpse_2, glimpse_3])
62
63			def _refined_glimpse_sensor(self, x_t, l_p):
			0 ignored issues – show Duplication introduced 2015-12-03 04:11 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
64			"""
65			Parameters:
66			x_t - 28x28 image
67			l_p - 2x1 focus vector
68			Returns:
69			7*14 matrix
70			"""
71			# Turn l_p to the left-top point of rectangle
72			l_p = l_p * 14 + 14 - 4
73			l_p = T.cast(T.round(l_p), "int32")
74
75			l_p = l_p * (l_p >= 0)
76			l_p = l_p * (l_p < 21) + (l_p >= 21) * 20
77			glimpse_1 = x_t[l_p[0]: l_p[0] + 7][:, l_p[1]: l_p[1] + 7]
78			# glimpse_2 = theano.tensor.signal.downsample.max_pool_2d(x_t, (4,4))
79			# return T.concatenate([glimpse_1, glimpse_2])
80			return glimpse_1
81
82			def _multi_gaussian_pdf(self, vec, mean):
83			norm2d_var = ((1.0 / T.sqrt((2np.pi)2 self.cov_det_var)) *
84			T.exp(-0.5 * ((vec-mean).T.dot(self.cov_inv_var).dot(vec-mean))))
85			return norm2d_var
86
87			def _glimpse_network(self, x_t, l_p):
88			"""
89			Parameters:
90			x_t - 28x28 image
91			l_p - 2x1 focus vector
92			Returns:
93			4x12 matrix
94			"""
95			sensor_output = self._refined_glimpse_sensor(x_t, l_p)
96			sensor_output = T.flatten(sensor_output)
97			h_g = self._relu(T.dot(sensor_output, self.W_g0))
98			h_l = self._relu(T.dot(l_p, self.W_g1))
99			g = self._relu(T.dot(h_g, self.W_g2_hg) + T.dot(h_l, self.W_g2_hl))
100			return g
101
102			def _location_network(self, h_t):
103			"""
104			Parameters:
105			h_t - 256x1 vector
106			Returns:
107			2x1 focus vector
108			"""
109			return T.dot(h_t, self.W_l)
110
111			def _action_network(self, h_t):
112			"""
113			Parameters:
114			h_t - 256x1 vector
115			Returns:
116			10x1 vector
117			"""
118			z = self._relu(T.dot(h_t, self.W_a) + self.B_a)
119			return self._softmax(z)
120
121			def _core_network(self, l_p, h_p, x_t):
122			"""
123			Parameters:
124			x_t - 28x28 image
125			l_p - 2x1 focus vector
126			h_p - 256x1 vector
127			Returns:
128			h_t, 256x1 vector
129			"""
130			g_t = self._glimpse_network(x_t, l_p)
131			h_t = self._tanh(T.dot(g_t, self.W_h_g) + T.dot(h_p, self.W_h) + self.B_h)
132			l_t = self._location_network(h_t)
133
134			if not self.disable_reinforce:
135			sampled_l_t = self._sample_gaussian(l_t, self.cov)
136			sampled_pdf = self._multi_gaussian_pdf(disconnected_grad(sampled_l_t), l_t)
137			wl_grad = T.grad(T.log(sampled_pdf), self.W_l)
138			else:
139			sampled_l_t = l_t
140			wl_grad = self.W_l
141
142			if self.random_glimpse and self.disable_reinforce:
143			sampled_l_t = self.srng.uniform((2,)) * 0.8
144
145			a_t = self._action_network(h_t)
146
147			return sampled_l_t, h_t, a_t, wl_grad
148
149
150			def _output_func(self):
151			self.x = self.x.reshape((28, 28))
152			[l_ts, h_ts, a_ts, wl_grads], _ = theano.scan(fn=self._core_network,
153			outputs_info=[self.l0, self.h0, None, None],
154			non_sequences=[self.x],
155			n_steps=5)
156
157			self.positions = l_ts
158			self.last_decision = T.argmax(a_ts[-1])
159			wl_grad = T.sum(wl_grads, axis=0) / wl_grads.shape[0]
160			self.wl_grad = wl_grad
161			return a_ts[-1].reshape((1,10))
162
163			def _setup_functions(self):
164			self._assistive_params = []
165			self._relu = build_activation("tanh")
166			self._tanh = build_activation("tanh")
167			self._softmax = build_activation("softmax")
168			self.output_func = self._output_func()
169
170			def _setup_params(self):
171			self.srng = RandomStreams(seed=234)
172			self.large_cov = np.array([[0.06,0],[0,0.06]], dtype=FLOATX)
173			self.small_cov = np.array([[self.gaussian_std,0],[0,self.gaussian_std]], dtype=FLOATX)
174			self.cov = theano.shared(np.array(self.small_cov, dtype=FLOATX))
175			self.cov_inv_var = theano.shared(np.array(LA.inv(self.small_cov), dtype=FLOATX))
176			self.cov_det_var = theano.shared(np.array(LA.det(self.small_cov), dtype=FLOATX))
177			self._sample_gaussian = SampleMultivariateGaussian()
178
179			self.W_g0 = self.create_weight(7*7, 128, suffix="g0")
180			self.W_g1 = self.create_weight(2, 128, suffix="g1")
181			self.W_g2_hg = self.create_weight(128, 256, suffix="g2_hg")
182			self.W_g2_hl = self.create_weight(128, 256, suffix="g2_hl")
183
184			self.W_h_g = self.create_weight(256, 256, suffix="h_g")
185			self.W_h = self.create_weight(256, 256, suffix="h")
186			self.B_h = self.create_bias(256, suffix="h")
187			self.h0 = self.create_vector(256, "h0")
188			self.l0 = self.create_vector(2, "l0")
189			self.l0.set_value(np.array([-1, -1], dtype=FLOATX))
190
191			self.W_l = self.create_weight(256, 2, suffix="l")
192			self.W_l.set_value(self.W_l.get_value() / 10)
193			self.B_l = self.create_bias(2, suffix="l")
194			self.W_a = self.create_weight(256, 10, suffix="a")
195			self.B_a = self.create_bias(10, suffix="a")
196
197
198			self.W = [self.W_g0, self.W_g1, self.W_g2_hg, self.W_g2_hl, self.W_h_g, self.W_h, self.W_a]
199			self.B = [self.B_h, self.B_a]
200			self.parameters = [self.W_l]
201
202
203			def get_network(model=None, std=0.005, disable_reinforce=False, random_glimpse=False):
204			"""
205			Get baseline model.
206			Parameters:
207			model - model path
208			Returns:
209			network
210			"""
211			network = NeuralClassifier(input_dim=28*28)
212			network.stack_layer(AttentionLayer(std=std, disable_reinforce=disable_reinforce, random_glimpse=random_glimpse))
213			if model and os.path.exists(model):
214			network.load_params(model)
215			return network
216
217

uaca / deepy

_setup_params() B last analyzed 2016-01-12 06:29 UTC

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like

_setup_params() B
last analyzed 2016-01-12 06:29 UTC