1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
from . import NeuralLayer |
5
|
|
|
from neural_var import NeuralVariable |
6
|
|
|
from deepy.utils import build_activation, FLOATX, XavierGlorotInitializer, OrthogonalInitializer, Scanner, neural_computation |
7
|
|
|
import numpy as np |
8
|
|
|
import theano.tensor as T |
9
|
|
|
from abc import ABCMeta, abstractmethod |
10
|
|
|
|
11
|
|
|
OUTPUT_TYPES = ["sequence", "one"] |
12
|
|
|
INPUT_TYPES = ["sequence", "one"] |
13
|
|
|
|
14
|
|
|
|
15
|
|
|
|
16
|
|
|
class RecurrentLayer(NeuralLayer): |
17
|
|
|
__metaclass__ = ABCMeta |
18
|
|
|
|
19
|
|
|
def __init__(self, name, state_names, hidden_size=100, input_type="sequence", output_type="sequence", |
20
|
|
|
inner_init=None, outer_init=None, |
21
|
|
|
gate_activation='sigmoid', activation='tanh', |
22
|
|
|
steps=None, backward=False, mask=None, |
23
|
|
|
additional_input_dims=None): |
24
|
|
|
super(RecurrentLayer, self).__init__(name) |
25
|
|
|
self.state_names = state_names |
26
|
|
|
self.main_state = state_names[0] |
27
|
|
|
self.hidden_size = hidden_size |
28
|
|
|
self._gate_activation = gate_activation |
29
|
|
|
self._activation = activation |
30
|
|
|
self.gate_activate = build_activation(self._gate_activation) |
31
|
|
|
self.activate = build_activation(self._activation) |
32
|
|
|
self._input_type = input_type |
33
|
|
|
self._output_type = output_type |
34
|
|
|
self.inner_init = inner_init if inner_init else OrthogonalInitializer() |
35
|
|
|
self.outer_init = outer_init if outer_init else XavierGlorotInitializer() |
36
|
|
|
self._steps = steps |
37
|
|
|
self._mask = mask.tensor if type(mask) == NeuralVariable else mask |
38
|
|
|
self._go_backwards = backward |
39
|
|
|
self.additional_input_dims = additional_input_dims if additional_input_dims else [] |
40
|
|
|
|
41
|
|
|
if input_type not in INPUT_TYPES: |
42
|
|
|
raise Exception("Input type of {} is wrong: {}".format(name, input_type)) |
43
|
|
|
if output_type not in OUTPUT_TYPES: |
44
|
|
|
raise Exception("Output type of {} is wrong: {}".format(name, output_type)) |
45
|
|
|
|
46
|
|
|
@neural_computation |
47
|
|
|
def step(self, step_inputs): |
48
|
|
|
new_states = self.compute_new_state(step_inputs) |
49
|
|
|
|
50
|
|
|
# apply mask for each step if `output_type` is 'one' |
51
|
|
|
if step_inputs.get("mask"): |
52
|
|
|
mask = step_inputs["mask"].dimshuffle(0, 'x') |
53
|
|
|
for state_name in new_states: |
54
|
|
|
new_states[state_name] = new_states[state_name] * mask + step_inputs[state_name] * (1 - mask) |
55
|
|
|
|
56
|
|
|
return new_states |
57
|
|
|
|
58
|
|
|
@abstractmethod |
59
|
|
|
def compute_new_state(self, step_inputs): |
60
|
|
|
""" |
61
|
|
|
:type step_inputs: dict |
62
|
|
|
:rtype: dict |
63
|
|
|
""" |
64
|
|
|
|
65
|
|
|
@abstractmethod |
66
|
|
|
def merge_inputs(self, input_var, additional_inputs=None): |
67
|
|
|
""" |
68
|
|
|
Merge inputs and return a map, which will be passed to core_step. |
69
|
|
|
:type input_var: T.var |
70
|
|
|
:param additional_inputs: list |
71
|
|
|
:rtype: dict |
72
|
|
|
""" |
73
|
|
|
|
74
|
|
|
@abstractmethod |
75
|
|
|
def prepare(self): |
76
|
|
|
pass |
77
|
|
|
|
78
|
|
|
@neural_computation |
79
|
|
|
def get_initial_states(self, input_var): |
80
|
|
|
""" |
81
|
|
|
:type input_var: T.var |
82
|
|
|
:rtype: dict |
83
|
|
|
""" |
84
|
|
|
initial_states = {} |
85
|
|
|
for state in self.state_names: |
86
|
|
|
if self._input_type == 'sequence' and input_var.ndim == 2: |
87
|
|
|
init_state = T.alloc(np.cast[FLOATX](0.), self.hidden_size) |
88
|
|
|
else: |
89
|
|
|
init_state = T.alloc(np.cast[FLOATX](0.), input_var.shape[0], self.hidden_size) |
90
|
|
|
initial_states[state] = init_state |
91
|
|
|
return initial_states |
92
|
|
|
|
93
|
|
|
@neural_computation |
94
|
|
|
def get_step_inputs(self, input_var, states=None, mask=None, additional_inputs=None): |
95
|
|
|
""" |
96
|
|
|
:type input_var: T.var |
97
|
|
|
:rtype: dict |
98
|
|
|
""" |
99
|
|
|
step_inputs = {} |
100
|
|
|
if self._input_type == "sequence": |
101
|
|
|
if not additional_inputs: |
102
|
|
|
additional_inputs = [] |
103
|
|
|
if mask: |
104
|
|
|
step_inputs['mask'] = mask.dimshuffle(1, 0) |
105
|
|
|
step_inputs.update(self.merge_inputs(input_var, additional_inputs=additional_inputs)) |
106
|
|
|
else: |
107
|
|
|
# step_inputs["mask"] = mask.dimshuffle((1,0)) if mask else None |
108
|
|
|
if additional_inputs: |
109
|
|
|
step_inputs.update(self.merge_inputs(None, additional_inputs=additional_inputs)) |
110
|
|
|
if states: |
111
|
|
|
for name in self.state_names: |
112
|
|
|
step_inputs[name] = states[name] |
113
|
|
|
|
114
|
|
|
return step_inputs |
115
|
|
|
|
116
|
|
|
def compute(self, input_var, mask=None, additional_inputs=None, steps=None, backward=False): |
117
|
|
|
if additional_inputs and not self.additional_input_dims: |
118
|
|
|
self.additional_input_dims = map(lambda var: var.dim(), additional_inputs) |
119
|
|
|
return super(RecurrentLayer, self).compute(input_var, mask=mask, additional_inputs=additional_inputs, steps=steps, backward=backward) |
120
|
|
|
|
121
|
|
|
def compute_tensor(self, input_var, mask=None, additional_inputs=None, steps=None, backward=False): |
122
|
|
|
# prepare parameters |
123
|
|
|
backward = backward if backward else self._go_backwards |
124
|
|
|
steps = steps if steps else self._steps |
125
|
|
|
mask = mask if mask else self._mask |
126
|
|
|
if mask and self._input_type == "one": |
127
|
|
|
raise Exception("Mask only works with sequence input") |
128
|
|
|
# get initial states |
129
|
|
|
init_state_map = self.get_initial_states(input_var) |
130
|
|
|
# get input sequence map |
131
|
|
|
if self._input_type == "sequence": |
132
|
|
|
# Move middle dimension to left-most position |
133
|
|
|
# (sequence, batch, value) |
134
|
|
|
if input_var.ndim == 3: |
135
|
|
|
input_var = input_var.dimshuffle((1,0,2)) |
136
|
|
|
|
137
|
|
|
seq_map = self.get_step_inputs(input_var, mask=mask, additional_inputs=additional_inputs) |
138
|
|
|
else: |
139
|
|
|
init_state_map[self.main_state] = input_var |
140
|
|
|
seq_map = self.get_step_inputs(None, mask=mask, additional_inputs=additional_inputs) |
141
|
|
|
# scan |
142
|
|
|
retval_map, _ = Scanner( |
143
|
|
|
self.step, |
144
|
|
|
sequences=seq_map, |
145
|
|
|
outputs_info=init_state_map, |
146
|
|
|
n_steps=steps, |
147
|
|
|
go_backwards=backward |
148
|
|
|
).compute() |
149
|
|
|
# return main states |
150
|
|
|
main_states = retval_map[self.main_state] |
151
|
|
|
if self._output_type == "one": |
152
|
|
|
return main_states[-1] |
153
|
|
|
elif self._output_type == "sequence": |
154
|
|
|
main_states = main_states.dimshuffle((1,0,2)) # ~ batch, time, size |
155
|
|
|
# if mask: # ~ batch, time |
156
|
|
|
# main_states *= mask.dimshuffle((0, 1, 'x')) |
157
|
|
|
return main_states |
158
|
|
|
|
159
|
|
|
|
160
|
|
|
class RNN(RecurrentLayer): |
161
|
|
|
|
162
|
|
|
def __init__(self, hidden_size, **kwargs): |
163
|
|
|
kwargs["hidden_size"] = hidden_size |
164
|
|
|
super(RNN, self).__init__("RNN", ["state"], **kwargs) |
165
|
|
|
|
166
|
|
|
@neural_computation |
167
|
|
|
def compute_new_state(self, step_inputs): |
168
|
|
|
xh_t, h_tm1 = map(step_inputs.get, ["xh_t", "state"]) |
169
|
|
|
if not xh_t: |
170
|
|
|
xh_t = 0 |
171
|
|
|
|
172
|
|
|
h_t = self.activate(xh_t + T.dot(h_tm1, self.W_h) + self.b_h) |
173
|
|
|
|
174
|
|
|
return {"state": h_t} |
175
|
|
|
|
176
|
|
|
@neural_computation |
177
|
|
|
def merge_inputs(self, input_var, additional_inputs=None): |
178
|
|
|
if not additional_inputs: |
179
|
|
|
additional_inputs = [] |
180
|
|
|
all_inputs = ([input_var] if input_var else []) + additional_inputs |
181
|
|
|
h_inputs = [] |
182
|
|
|
for x, weights in zip(all_inputs, self.input_weights): |
183
|
|
|
wi, = weights |
184
|
|
|
h_inputs.append(T.dot(x, wi)) |
185
|
|
|
merged_inputs = { |
186
|
|
|
"xh_t": sum(h_inputs) |
187
|
|
|
} |
188
|
|
|
return merged_inputs |
189
|
|
|
|
190
|
|
|
def prepare(self): |
191
|
|
|
self.output_dim = self.hidden_size |
192
|
|
|
|
193
|
|
|
self.W_h = self.create_weight(self.hidden_size, self.hidden_size, "h", initializer=self.outer_init) |
194
|
|
|
self.b_h = self.create_bias(self.hidden_size, "h") |
195
|
|
|
|
196
|
|
|
self.register_parameters(self.W_h, self.b_h) |
197
|
|
|
|
198
|
|
|
self.input_weights = [] |
199
|
|
|
if self._input_type == "sequence": |
200
|
|
|
normal_input_dims = [self.input_dim] |
201
|
|
|
else: |
202
|
|
|
normal_input_dims = [] |
203
|
|
|
|
204
|
|
|
all_input_dims = normal_input_dims + self.additional_input_dims |
205
|
|
|
for i, input_dim in enumerate(all_input_dims): |
206
|
|
|
wi = self.create_weight(input_dim, self.hidden_size, "wi_{}".format(i+1), initializer=self.outer_init) |
207
|
|
|
weights = [wi] |
208
|
|
|
self.input_weights.append(weights) |
209
|
|
|
self.register_parameters(*weights) |