|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- coding: utf-8 -*- |
|
3
|
|
|
|
|
4
|
|
|
from . import NeuralLayer |
|
5
|
|
|
from neural_var import NeuralVariable |
|
6
|
|
|
from deepy.utils import build_activation, FLOATX, XavierGlorotInitializer, OrthogonalInitializer, Scanner, neural_computation |
|
7
|
|
|
import numpy as np |
|
8
|
|
|
import theano.tensor as T |
|
9
|
|
|
from abc import ABCMeta, abstractmethod |
|
10
|
|
|
|
|
11
|
|
|
OUTPUT_TYPES = ["sequence", "one"] |
|
12
|
|
|
INPUT_TYPES = ["sequence", "one"] |
|
13
|
|
|
|
|
14
|
|
|
|
|
15
|
|
|
|
|
16
|
|
|
class RecurrentLayer(NeuralLayer): |
|
17
|
|
|
__metaclass__ = ABCMeta |
|
18
|
|
|
|
|
19
|
|
|
def __init__(self, name, state_names, hidden_size=100, input_type="sequence", output_type="sequence", |
|
20
|
|
|
inner_init=None, outer_init=None, |
|
21
|
|
|
gate_activation='sigmoid', activation='tanh', |
|
22
|
|
|
steps=None, backward=False, mask=None, |
|
23
|
|
|
additional_input_dims=None): |
|
24
|
|
|
super(RecurrentLayer, self).__init__(name) |
|
25
|
|
|
self.state_names = state_names |
|
26
|
|
|
self.main_state = state_names[0] |
|
27
|
|
|
self.hidden_size = hidden_size |
|
28
|
|
|
self._gate_activation = gate_activation |
|
29
|
|
|
self._activation = activation |
|
30
|
|
|
self.gate_activate = build_activation(self._gate_activation) |
|
31
|
|
|
self.activate = build_activation(self._activation) |
|
32
|
|
|
self._input_type = input_type |
|
33
|
|
|
self._output_type = output_type |
|
34
|
|
|
self.inner_init = inner_init if inner_init else OrthogonalInitializer() |
|
35
|
|
|
self.outer_init = outer_init if outer_init else XavierGlorotInitializer() |
|
36
|
|
|
self._steps = steps |
|
37
|
|
|
self._mask = mask.tensor if type(mask) == NeuralVariable else mask |
|
38
|
|
|
self._go_backwards = backward |
|
39
|
|
|
self.additional_input_dims = additional_input_dims if additional_input_dims else [] |
|
40
|
|
|
|
|
41
|
|
|
if input_type not in INPUT_TYPES: |
|
42
|
|
|
raise Exception("Input type of {} is wrong: {}".format(name, input_type)) |
|
43
|
|
|
if output_type not in OUTPUT_TYPES: |
|
44
|
|
|
raise Exception("Output type of {} is wrong: {}".format(name, output_type)) |
|
45
|
|
|
|
|
46
|
|
|
@neural_computation |
|
47
|
|
|
def step(self, step_inputs): |
|
48
|
|
|
new_states = self.compute_new_state(step_inputs) |
|
49
|
|
|
|
|
50
|
|
|
# apply mask for each step if `output_type` is 'one' |
|
51
|
|
|
if step_inputs.get("mask"): |
|
52
|
|
|
mask = step_inputs["mask"].dimshuffle(0, 'x') |
|
53
|
|
|
for state_name in new_states: |
|
54
|
|
|
new_states[state_name] = new_states[state_name] * mask + step_inputs[state_name] * (1 - mask) |
|
55
|
|
|
|
|
56
|
|
|
return new_states |
|
57
|
|
|
|
|
58
|
|
|
@abstractmethod |
|
59
|
|
|
def compute_new_state(self, step_inputs): |
|
60
|
|
|
""" |
|
61
|
|
|
:type step_inputs: dict |
|
62
|
|
|
:rtype: dict |
|
63
|
|
|
""" |
|
64
|
|
|
|
|
65
|
|
|
@abstractmethod |
|
66
|
|
|
def merge_inputs(self, input_var, additional_inputs=None): |
|
67
|
|
|
""" |
|
68
|
|
|
Merge inputs and return a map, which will be passed to core_step. |
|
69
|
|
|
:type input_var: T.var |
|
70
|
|
|
:param additional_inputs: list |
|
71
|
|
|
:rtype: dict |
|
72
|
|
|
""" |
|
73
|
|
|
|
|
74
|
|
|
@abstractmethod |
|
75
|
|
|
def prepare(self): |
|
76
|
|
|
pass |
|
77
|
|
|
|
|
78
|
|
|
@neural_computation |
|
79
|
|
|
def get_initial_states(self, input_var): |
|
80
|
|
|
""" |
|
81
|
|
|
:type input_var: T.var |
|
82
|
|
|
:rtype: dict |
|
83
|
|
|
""" |
|
84
|
|
|
initial_states = {} |
|
85
|
|
|
for state in self.state_names: |
|
86
|
|
|
if self._input_type == 'sequence' and input_var.ndim == 2: |
|
87
|
|
|
init_state = T.alloc(np.cast[FLOATX](0.), self.hidden_size) |
|
88
|
|
|
else: |
|
89
|
|
|
init_state = T.alloc(np.cast[FLOATX](0.), input_var.shape[0], self.hidden_size) |
|
90
|
|
|
initial_states[state] = init_state |
|
91
|
|
|
return initial_states |
|
92
|
|
|
|
|
93
|
|
|
@neural_computation |
|
94
|
|
|
def get_step_inputs(self, input_var, states=None, mask=None, additional_inputs=None): |
|
95
|
|
|
""" |
|
96
|
|
|
:type input_var: T.var |
|
97
|
|
|
:rtype: dict |
|
98
|
|
|
""" |
|
99
|
|
|
step_inputs = {} |
|
100
|
|
|
if self._input_type == "sequence": |
|
101
|
|
|
if not additional_inputs: |
|
102
|
|
|
additional_inputs = [] |
|
103
|
|
|
if mask: |
|
104
|
|
|
step_inputs['mask'] = mask.dimshuffle(1, 0) |
|
105
|
|
|
step_inputs.update(self.merge_inputs(input_var, additional_inputs=additional_inputs)) |
|
106
|
|
|
else: |
|
107
|
|
|
# step_inputs["mask"] = mask.dimshuffle((1,0)) if mask else None |
|
108
|
|
|
if additional_inputs: |
|
109
|
|
|
step_inputs.update(self.merge_inputs(None, additional_inputs=additional_inputs)) |
|
110
|
|
|
if states: |
|
111
|
|
|
for name in self.state_names: |
|
112
|
|
|
step_inputs[name] = states[name] |
|
113
|
|
|
|
|
114
|
|
|
return step_inputs |
|
115
|
|
|
|
|
116
|
|
|
def compute(self, input_var, mask=None, additional_inputs=None, steps=None, backward=False, init_states=None, return_all_states=False): |
|
117
|
|
|
if additional_inputs and not self.additional_input_dims: |
|
118
|
|
|
self.additional_input_dims = map(lambda var: var.dim(), additional_inputs) |
|
119
|
|
|
result_var = super(RecurrentLayer, self).compute(input_var, |
|
120
|
|
|
mask=mask, additional_inputs=additional_inputs, steps=steps, backward=backward, init_states=init_states, return_all_states=return_all_states) |
|
121
|
|
|
if return_all_states: |
|
122
|
|
|
state_map = {} |
|
123
|
|
|
for k in result_var.tensor: |
|
124
|
|
|
state_map[k] = NeuralVariable(result_var.tensor[k], result_var.test_tensor[k], self.output_dim) |
|
125
|
|
|
return state_map |
|
126
|
|
|
else: |
|
127
|
|
|
return result_var |
|
128
|
|
|
|
|
129
|
|
|
def compute_tensor(self, input_var, mask=None, additional_inputs=None, steps=None, backward=False, init_states=None, return_all_states=False): |
|
130
|
|
|
# prepare parameters |
|
131
|
|
|
backward = backward if backward else self._go_backwards |
|
132
|
|
|
steps = steps if steps else self._steps |
|
133
|
|
|
mask = mask if mask else self._mask |
|
134
|
|
|
if mask and self._input_type == "one": |
|
135
|
|
|
raise Exception("Mask only works with sequence input") |
|
136
|
|
|
# get initial states |
|
137
|
|
|
init_state_map = self.get_initial_states(input_var) |
|
138
|
|
|
if init_states: |
|
139
|
|
|
for name, val in init_states.items(): |
|
140
|
|
|
if name in init_state_map: |
|
141
|
|
|
init_state_map[name] = val |
|
142
|
|
|
# get input sequence map |
|
143
|
|
|
if self._input_type == "sequence": |
|
144
|
|
|
# Move middle dimension to left-most position |
|
145
|
|
|
# (sequence, batch, value) |
|
146
|
|
|
if input_var.ndim == 3: |
|
147
|
|
|
input_var = input_var.dimshuffle((1,0,2)) |
|
148
|
|
|
|
|
149
|
|
|
seq_map = self.get_step_inputs(input_var, mask=mask, additional_inputs=additional_inputs) |
|
150
|
|
|
else: |
|
151
|
|
|
init_state_map[self.main_state] = input_var |
|
152
|
|
|
seq_map = self.get_step_inputs(None, mask=mask, additional_inputs=additional_inputs) |
|
153
|
|
|
# scan |
|
154
|
|
|
retval_map, _ = Scanner( |
|
155
|
|
|
self.step, |
|
156
|
|
|
sequences=seq_map, |
|
157
|
|
|
outputs_info=init_state_map, |
|
158
|
|
|
n_steps=steps, |
|
159
|
|
|
go_backwards=backward |
|
160
|
|
|
).compute() |
|
161
|
|
|
# return main states |
|
162
|
|
|
main_states = retval_map[self.main_state] |
|
163
|
|
|
if self._output_type == "one": |
|
164
|
|
|
if return_all_states: |
|
165
|
|
|
return_map = {} |
|
166
|
|
|
for name, val in retval_map.items(): |
|
167
|
|
|
return_map[name] = val[-1] |
|
168
|
|
|
return return_map |
|
169
|
|
|
else: |
|
170
|
|
|
return main_states[-1] |
|
171
|
|
|
elif self._output_type == "sequence": |
|
172
|
|
|
if return_all_states: |
|
173
|
|
|
return_map = {} |
|
174
|
|
|
for name, val in retval_map.items(): |
|
175
|
|
|
return_map[name] = val.dimshuffle((1,0,2)) |
|
176
|
|
|
return return_map |
|
177
|
|
|
else: |
|
178
|
|
|
main_states = main_states.dimshuffle((1,0,2)) # ~ batch, time, size |
|
179
|
|
|
# if mask: # ~ batch, time |
|
180
|
|
|
# main_states *= mask.dimshuffle((0, 1, 'x')) |
|
181
|
|
|
return main_states |
|
182
|
|
|
|
|
183
|
|
|
|
|
184
|
|
|
class RNN(RecurrentLayer): |
|
185
|
|
|
|
|
186
|
|
|
def __init__(self, hidden_size, **kwargs): |
|
187
|
|
|
kwargs["hidden_size"] = hidden_size |
|
188
|
|
|
super(RNN, self).__init__("RNN", ["state"], **kwargs) |
|
189
|
|
|
|
|
190
|
|
|
@neural_computation |
|
191
|
|
|
def compute_new_state(self, step_inputs): |
|
192
|
|
|
xh_t, h_tm1 = map(step_inputs.get, ["xh_t", "state"]) |
|
193
|
|
|
if not xh_t: |
|
194
|
|
|
xh_t = 0 |
|
195
|
|
|
|
|
196
|
|
|
h_t = self.activate(xh_t + T.dot(h_tm1, self.W_h) + self.b_h) |
|
197
|
|
|
|
|
198
|
|
|
return {"state": h_t} |
|
199
|
|
|
|
|
200
|
|
|
@neural_computation |
|
201
|
|
|
def merge_inputs(self, input_var, additional_inputs=None): |
|
202
|
|
|
if not additional_inputs: |
|
203
|
|
|
additional_inputs = [] |
|
204
|
|
|
all_inputs = ([input_var] if input_var else []) + additional_inputs |
|
205
|
|
|
h_inputs = [] |
|
206
|
|
|
for x, weights in zip(all_inputs, self.input_weights): |
|
207
|
|
|
wi, = weights |
|
208
|
|
|
h_inputs.append(T.dot(x, wi)) |
|
209
|
|
|
merged_inputs = { |
|
210
|
|
|
"xh_t": sum(h_inputs) |
|
211
|
|
|
} |
|
212
|
|
|
return merged_inputs |
|
213
|
|
|
|
|
214
|
|
|
def prepare(self): |
|
215
|
|
|
self.output_dim = self.hidden_size |
|
216
|
|
|
|
|
217
|
|
|
self.W_h = self.create_weight(self.hidden_size, self.hidden_size, "h", initializer=self.outer_init) |
|
218
|
|
|
self.b_h = self.create_bias(self.hidden_size, "h") |
|
219
|
|
|
|
|
220
|
|
|
self.register_parameters(self.W_h, self.b_h) |
|
221
|
|
|
|
|
222
|
|
|
self.input_weights = [] |
|
223
|
|
|
if self._input_type == "sequence": |
|
224
|
|
|
normal_input_dims = [self.input_dim] |
|
225
|
|
|
else: |
|
226
|
|
|
normal_input_dims = [] |
|
227
|
|
|
|
|
228
|
|
|
all_input_dims = normal_input_dims + self.additional_input_dims |
|
229
|
|
|
for i, input_dim in enumerate(all_input_dims): |
|
230
|
|
|
wi = self.create_weight(input_dim, self.hidden_size, "wi_{}".format(i+1), initializer=self.outer_init) |
|
231
|
|
|
weights = [wi] |
|
232
|
|
|
self.input_weights.append(weights) |
|
233
|
|
|
self.register_parameters(*weights) |