|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- coding: utf-8 -*- |
|
3
|
|
|
|
|
4
|
|
|
import numpy as np |
|
5
|
|
|
from deepy.utils import FLOATX |
|
6
|
|
|
|
|
7
|
|
|
def pad_dataset(subset, side, length): |
|
8
|
|
|
""" |
|
9
|
|
|
Pad data set to specified length. |
|
10
|
|
|
Parameters: |
|
11
|
|
|
length - max length, a just to the max length in the batch if length is -1 |
|
12
|
|
|
""" |
|
13
|
|
|
assert length == -1 or length > 0 |
|
14
|
|
|
if type(subset[0][0][0]) in [float, int, np.int64, np.int32, np.float32]: |
|
15
|
|
|
return _pad_2d(subset, side, length) |
|
16
|
|
|
else: |
|
17
|
|
|
return _pad_3d(subset, side, length) |
|
18
|
|
|
|
|
19
|
|
|
def _pad_2d(subset, side, length): |
|
20
|
|
|
new_set = [] |
|
21
|
|
|
max_len = max([len(x) for x, _ in subset]) if length == -1 else length |
|
22
|
|
|
for x, y in subset: |
|
23
|
|
|
if len(y) > max_len: |
|
24
|
|
|
y = y[:max_len] |
|
25
|
|
|
elif len(y) < max_len: |
|
26
|
|
|
if side == "left": |
|
27
|
|
|
y = [0 for _ in range(max_len - len(y))] + y |
|
28
|
|
|
elif side == "right": |
|
29
|
|
|
y = y + [0 for _ in range(max_len - len(y))] |
|
30
|
|
|
if len(x) > max_len: |
|
31
|
|
|
x = x[:max_len] |
|
32
|
|
|
elif len(x) < max_len: |
|
33
|
|
|
if side == "left": |
|
34
|
|
|
x = [0 for _ in range(max_len - len(x))] + x |
|
35
|
|
|
elif side == "right": |
|
36
|
|
|
x = x + [0 for _ in range(max_len - len(x))] |
|
37
|
|
|
new_set.append((x, y)) |
|
38
|
|
|
return new_set |
|
39
|
|
|
|
|
40
|
|
|
def _pad_3d(subset, side, length): |
|
41
|
|
|
row_size = subset[0][0][0].shape[0] |
|
42
|
|
|
new_set = [] |
|
43
|
|
|
max_len = max([len(x) for x, _ in subset]) if length == -1 else length |
|
44
|
|
|
for x, y in subset: |
|
45
|
|
|
if type(y) == list: |
|
46
|
|
|
# Clip target vector |
|
47
|
|
|
if len(y) > max_len: |
|
48
|
|
|
y = y[:max_len] |
|
49
|
|
|
elif len(y) < max_len: |
|
50
|
|
|
if side == "left": |
|
51
|
|
|
y = [0 for _ in range(max_len - len(y))] + y |
|
52
|
|
|
elif side == "right": |
|
53
|
|
|
y = y + [0 for _ in range(max_len - len(y))] |
|
54
|
|
|
if len(x) > max_len: |
|
55
|
|
|
x = x[:max_len] |
|
56
|
|
|
elif len(x) < max_len: |
|
57
|
|
|
pad_length = max_len - len(x) |
|
58
|
|
|
pad_matrix = np.zeros((pad_length,row_size), dtype=FLOATX) |
|
59
|
|
|
if side == "left": |
|
60
|
|
|
x = np.vstack([pad_matrix, x]) |
|
61
|
|
|
elif side == "right": |
|
62
|
|
|
x = np.vstack([x, pad_matrix]) |
|
63
|
|
|
else: |
|
64
|
|
|
return Exception("Side of padding must be 'left' or 'right'") |
|
65
|
|
|
new_set.append((x, y)) |
|
66
|
|
|
return new_set |