1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
import numpy as np |
5
|
|
|
from deepy.core.env import FLOATX |
6
|
|
|
|
7
|
|
|
def pad_dataset(subset, side="right", length=-1): |
8
|
|
|
""" |
9
|
|
|
Pad data set to specified length. |
10
|
|
|
Parameters: |
11
|
|
|
length - max length, a just to the max length in the batch if length is -1 |
12
|
|
|
""" |
13
|
|
|
assert length == -1 or length > 0 |
14
|
|
|
if type(subset[0][0][0]) in [float, int, np.int64, np.int32, np.float32]: |
15
|
|
|
return _pad_2d(subset, side, length) |
16
|
|
|
else: |
17
|
|
|
return _pad_3d(subset, side, length) |
18
|
|
|
|
19
|
|
|
def _pad_2d(subset, side, length): |
20
|
|
|
new_set = [] |
21
|
|
|
max_len = max([len(x) for x, _ in subset]) if length == -1 else length |
22
|
|
|
for x, y in subset: |
23
|
|
|
if len(y) > max_len: |
24
|
|
|
y = y[:max_len] |
25
|
|
|
elif len(y) < max_len: |
26
|
|
|
if side == "left": |
27
|
|
|
y = [0 for _ in range(max_len - len(y))] + y |
28
|
|
|
elif side == "right": |
29
|
|
|
y = y + [0 for _ in range(max_len - len(y))] |
30
|
|
|
if len(x) > max_len: |
31
|
|
|
x = x[:max_len] |
32
|
|
|
elif len(x) < max_len: |
33
|
|
|
if side == "left": |
34
|
|
|
x = [0 for _ in range(max_len - len(x))] + x |
35
|
|
|
elif side == "right": |
36
|
|
|
x = x + [0 for _ in range(max_len - len(x))] |
37
|
|
|
new_set.append((x, y)) |
38
|
|
|
return new_set |
39
|
|
|
|
40
|
|
|
def _pad_3d(subset, side, length): |
41
|
|
|
row_size = subset[0][0][0].shape[0] |
42
|
|
|
new_set = [] |
43
|
|
|
max_len = max([len(x) for x, _ in subset]) if length == -1 else length |
44
|
|
|
for x, y in subset: |
45
|
|
|
if type(y) == list: |
46
|
|
|
# Clip target vector |
47
|
|
|
if len(y) > max_len: |
48
|
|
|
y = y[:max_len] |
49
|
|
|
elif len(y) < max_len: |
50
|
|
|
if side == "left": |
51
|
|
|
y = [0 for _ in range(max_len - len(y))] + y |
52
|
|
|
elif side == "right": |
53
|
|
|
y = y + [0 for _ in range(max_len - len(y))] |
54
|
|
|
if len(x) > max_len: |
55
|
|
|
x = x[:max_len] |
56
|
|
|
elif len(x) < max_len: |
57
|
|
|
pad_length = max_len - len(x) |
58
|
|
|
pad_matrix = np.zeros((pad_length,row_size), dtype=FLOATX) |
59
|
|
|
if side == "left": |
60
|
|
|
x = np.vstack([pad_matrix, x]) |
61
|
|
|
elif side == "right": |
62
|
|
|
x = np.vstack([x, pad_matrix]) |
63
|
|
|
else: |
64
|
|
|
return Exception("Side of padding must be 'left' or 'right'") |
65
|
|
|
new_set.append((x, y)) |
66
|
|
|
return new_set |