1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
from deepy.core.env import EPSILON |
5
|
|
|
from deepy.core import neural_computation |
6
|
|
|
|
7
|
|
|
import theano.tensor as T |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
@neural_computation |
11
|
|
|
def cross_entropy(y, target_index, mask=None, after_softmax=False): |
12
|
|
|
if y.ndim == 3: |
13
|
|
|
return cross_entropy_3d(y, target_index, mask, after_softmax=after_softmax) |
14
|
|
|
else: |
15
|
|
|
if str(y.owner.op).lower().startswith("softmax"): |
16
|
|
|
after_softmax = True |
17
|
|
|
if not after_softmax: |
18
|
|
|
y = T.nnet.softmax(y) |
19
|
|
|
return -T.mean(T.log(y)[T.arange(target_index.shape[0]), target_index]) |
20
|
|
|
|
21
|
|
|
@neural_computation |
22
|
|
|
def cross_entropy_3d(y, target_index, mask=None, after_softmax=False): |
23
|
|
|
if str(y.owner.op).lower().startswith("softmax"): |
24
|
|
|
after_softmax = True |
25
|
|
|
flat_mask = mask.flatten() if mask else 1 |
26
|
|
|
|
27
|
|
|
# Softmax |
28
|
|
|
shape = y.shape |
29
|
|
|
y_2d = y.reshape((shape[0] * shape[1], shape[2])) |
30
|
|
|
if after_softmax: |
31
|
|
|
softmax_tensor = y_2d * (flat_mask[:, None] if mask else 1) |
32
|
|
|
else: |
33
|
|
|
if mask: |
34
|
|
|
penalties = 99. * (1 - flat_mask) |
35
|
|
|
y_2d -= penalties[:, None] |
36
|
|
|
softmax_tensor = T.nnet.softmax(y_2d) |
37
|
|
|
|
38
|
|
|
# Get cost |
39
|
|
|
result_vector = softmax_tensor.flatten() |
40
|
|
|
target_vector = target_index.flatten() |
41
|
|
|
target_index_vector = T.arange(target_vector.shape[0]) * shape[-1] + target_vector |
42
|
|
|
|
43
|
|
|
prob_vector = result_vector[target_index_vector] |
44
|
|
|
prob_vector = T.clip(prob_vector, EPSILON, 1.0 - EPSILON) |
45
|
|
|
log_prob_vector = - T.log(prob_vector) * flat_mask |
46
|
|
|
cost = T.sum(log_prob_vector) / T.sum(flat_mask) |
47
|
|
|
return cost |
48
|
|
|
|
49
|
|
|
@neural_computation |
50
|
|
|
def least_squares(y, target): |
51
|
|
|
err = y - target |
52
|
|
|
return T.mean((err * err).sum(axis=target.ndim - 1)) / 2 |
53
|
|
|
|
54
|
|
|
@neural_computation |
55
|
|
|
def accuracy(y, target_index, mask=None): |
56
|
|
|
if mask: |
57
|
|
|
target_index = target_index * mask - (1 - mask) |
58
|
|
|
hits = T.eq(y, target_index) |
59
|
|
|
if mask: |
60
|
|
|
return T.sum(hits) / T.sum(mask) |
61
|
|
|
else: |
62
|
|
|
return T.mean(hits) |
63
|
|
|
|
64
|
|
|
@neural_computation |
65
|
|
|
def error_rate(y, target_index): |
66
|
|
|
return 1. - accuracy(y, target_index) |
67
|
|
|
|
68
|
|
|
|
69
|
|
|
|