|
1
|
|
|
|
|
2
|
|
|
import tensorflow as tf |
|
3
|
|
|
|
|
4
|
|
|
|
|
5
|
|
|
class NSTCostComputer: |
|
6
|
|
|
@classmethod |
|
7
|
|
|
def compute(cls, J_content: float, J_style: float, alpha: float=10, beta: float=40) -> float: |
|
8
|
|
|
"""Compute the total cost function. |
|
9
|
|
|
|
|
10
|
|
|
Computes the total cost (aka learning error) as a linear combination of |
|
11
|
|
|
the 'content cost' and 'style cost'. |
|
12
|
|
|
|
|
13
|
|
|
Total cost = alpha * J_content + beta * J_style |
|
14
|
|
|
|
|
15
|
|
|
Or mathematically expressed as: |
|
16
|
|
|
|
|
17
|
|
|
J(G) = alpha * J_content(C, G) + beta * J_style(S, G) |
|
18
|
|
|
|
|
19
|
|
|
where G: Generated Image, C: Content Image, S: Style Image |
|
20
|
|
|
and J, J_content, J_style are mathematical functions |
|
21
|
|
|
|
|
22
|
|
|
Args: |
|
23
|
|
|
J_content (float): content cost |
|
24
|
|
|
J_style (float): style cost |
|
25
|
|
|
alpha (float, optional): hyperparameter to weight content cost. Defaults to 10. |
|
26
|
|
|
beta (float, optional): hyperparameter to weight style cost. Defaults to 40. |
|
27
|
|
|
|
|
28
|
|
|
Returns: |
|
29
|
|
|
float: the total cost as defined by the formula above |
|
30
|
|
|
""" |
|
31
|
|
|
return alpha * J_content + beta * J_style |
|
32
|
|
|
|
|
33
|
|
|
|
|
34
|
|
|
class NSTContentCostComputer: |
|
35
|
|
|
@classmethod |
|
36
|
|
|
def compute(cls, a_C, a_G): |
|
37
|
|
|
""" |
|
38
|
|
|
Computes the content cost |
|
39
|
|
|
|
|
40
|
|
|
Assumption 1: a layer l has been chosen from a (Deep) Neural Network |
|
41
|
|
|
trained on images, that should act as a style model. |
|
42
|
|
|
|
|
43
|
|
|
Then: |
|
44
|
|
|
1. a_C (3D volume) are the hidden layer activations in the chosen layer (l), when the C |
|
45
|
|
|
image is forward propagated (passed through) in the network. |
|
46
|
|
|
|
|
47
|
|
|
2. a_G (3D volume) are the hidden layer activations in the chosen layer (l), when the G |
|
48
|
|
|
image is forward propagated (passed through) in the network. |
|
49
|
|
|
|
|
50
|
|
|
3. The above activations are a n_H x n_W x n_C tensor |
|
51
|
|
|
OR Height x Width x Number_of_Channers |
|
52
|
|
|
|
|
53
|
|
|
Pseudo code for latex expression of the mathematical equation: |
|
54
|
|
|
|
|
55
|
|
|
J_content(C, G) = \frac{1}{4 * n_H * n_W * n_C} * \sum_{all entries} (a^(C) - a^(G))^2 |
|
56
|
|
|
OR |
|
57
|
|
|
J_content(C, G) = sum_{for all entries} (a^(C) - a^(G))^2 / (4 * n_H * n_W * n_C) |
|
58
|
|
|
|
|
59
|
|
|
Note that n_H * n_W * n_C is part of the normalization term. |
|
60
|
|
|
|
|
61
|
|
|
Args: |
|
62
|
|
|
a_C (tensor): of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image C |
|
63
|
|
|
a_G (tensor): of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image G |
|
64
|
|
|
|
|
65
|
|
|
Returns: |
|
66
|
|
|
(tensor): 1D with 1 scalar value computed using the equation above |
|
67
|
|
|
""" |
|
68
|
|
|
# Retrieve dimensions from a_G |
|
69
|
|
|
m, n_H, n_W, n_C = a_G.get_shape().as_list() |
|
70
|
|
|
|
|
71
|
|
|
# Reshape a_C and a_G |
|
72
|
|
|
# a_C_unrolled = tf.reshape(a_C, [m, n_H * n_W, n_C]) |
|
73
|
|
|
# a_G_unrolled = tf.reshape(a_G, [m, n_H * n_W, n_C]) |
|
74
|
|
|
|
|
75
|
|
|
# compute the cost |
|
76
|
|
|
J_content = tf.reduce_sum(tf.square(a_C - a_G)) / (4 * n_H * n_W * n_C) |
|
77
|
|
|
return J_content |
|
78
|
|
|
|
|
79
|
|
|
|
|
80
|
|
|
from .math import gram_matrix |
|
81
|
|
|
|
|
82
|
|
|
class GramMatrixComputer(type): |
|
83
|
|
|
def __new__(mcs, *args, **kwargs): |
|
84
|
|
|
class_object = super().__new__(mcs, *args, **kwargs) |
|
85
|
|
|
class_object.compute_gram = gram_matrix |
|
86
|
|
|
return class_object |
|
87
|
|
|
|
|
88
|
|
|
|
|
89
|
|
|
class NSTLayerStyleCostComputer(metaclass=GramMatrixComputer): |
|
90
|
|
|
|
|
91
|
|
|
@classmethod |
|
92
|
|
|
def compute(cls, a_S, a_G): |
|
93
|
|
|
""" |
|
94
|
|
|
Compute the Style Cost, using the activations of the l style layer. |
|
95
|
|
|
|
|
96
|
|
|
Mathematical equation written in Latex code: |
|
97
|
|
|
J^{[l]}_style (S, G) = \frac{1}{4 * n_c^2 * (n_H * n_W)^2} \sum^{n_C}_{i=1} \sum^{c_C}_{j=1} (G^{(S)}_{(gram)i,j} - G^{(G)}_{(gram)i,j})^2 |
|
98
|
|
|
|
|
99
|
|
|
OR |
|
100
|
|
|
|
|
101
|
|
|
Cost(S, G) = \sum^{n_C}_{i=1} \sum^{c_C}_{j=1} (G^{(S)}_{(gram)i,j} - G^{(G)}_{(gram)i,j})^2 / ( 4 * n_c^2 * (n_H * n_W)^2 ) |
|
102
|
|
|
|
|
103
|
|
|
Args: |
|
104
|
|
|
a_S (tensor): tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S |
|
105
|
|
|
a_G (tensor): tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G |
|
106
|
|
|
|
|
107
|
|
|
Returns: |
|
108
|
|
|
(tensor): J_style_layer tensor representing a scalar value, style cost defined above by equation (2) |
|
109
|
|
|
""" |
|
110
|
|
|
# Retrieve dimensions from a_G |
|
111
|
|
|
m, n_H, n_W, n_C = a_G.get_shape().as_list() |
|
112
|
|
|
|
|
113
|
|
|
# Reshape the images to have them of shape (n_C, n_H*n_W) |
|
114
|
|
|
a_S = tf.transpose(tf.reshape(a_S, [n_H * n_W, n_C])) |
|
115
|
|
|
a_G = tf.transpose(tf.reshape(a_G, [n_H * n_W, n_C])) |
|
116
|
|
|
|
|
117
|
|
|
# Computing gram_matrices for both images S and G |
|
118
|
|
|
GS = cls.compute_gram(a_S) |
|
119
|
|
|
GG = cls.compute_gram(a_G) |
|
120
|
|
|
|
|
121
|
|
|
# Computing the loss |
|
122
|
|
|
J_style_layer = tf.reduce_sum(tf.square(GS - GG)) / ( 4 * n_C**2 * (n_H * n_W)**2) |
|
123
|
|
|
|
|
124
|
|
|
return J_style_layer |
|
125
|
|
|
|
|
126
|
|
|
|
|
127
|
|
|
class NSTStyleCostComputer: |
|
128
|
|
|
style_layer_cost = NSTLayerStyleCostComputer.compute |
|
129
|
|
|
|
|
130
|
|
|
@classmethod |
|
131
|
|
|
def compute(cls, tf_session, model_layers): |
|
132
|
|
|
""" |
|
133
|
|
|
Computes the overall style cost from several chosen layers |
|
134
|
|
|
|
|
135
|
|
|
Args: |
|
136
|
|
|
tf_session (tf.compat.v1.INteractiveSession): the active interactive tf session |
|
137
|
|
|
model_layers () -- our image model (probably pretrained on large dataset) |
|
138
|
|
|
STYLE_LAYERS -- A python list containing: |
|
139
|
|
|
- the names of the layers we would like to extract style from |
|
140
|
|
|
- a coefficient for each of them |
|
141
|
|
|
|
|
142
|
|
|
Returns: |
|
143
|
|
|
(tensor): J_style - tensor representing a scalar value, style cost defined above by equation (2) |
|
144
|
|
|
""" |
|
145
|
|
|
# initialize the overall style cost |
|
146
|
|
|
J_style = 0 |
|
147
|
|
|
|
|
148
|
|
|
# for layer_name, coeff in STYLE_LAYERS: |
|
149
|
|
|
for style_layer_id, nst_style_layer in model_layers: |
|
150
|
|
|
|
|
151
|
|
|
# Select the output tensor of the currently selected layer |
|
152
|
|
|
out = nst_style_layer.neurons |
|
153
|
|
|
# out = model[layer_name] |
|
154
|
|
|
|
|
155
|
|
|
# Set a_S to be the hidden layer activation from the layer we have selected, by running the session on out |
|
156
|
|
|
a_S = tf_session.run(out) |
|
157
|
|
|
|
|
158
|
|
|
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name] |
|
159
|
|
|
# and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that |
|
160
|
|
|
# when we run the session, this will be the activations drawn from the appropriate layer, with G as input. |
|
161
|
|
|
a_G = out |
|
162
|
|
|
|
|
163
|
|
|
# Compute style_cost for the current layer |
|
164
|
|
|
J_style_layer = cls.style_layer_cost(a_S, a_G) |
|
165
|
|
|
|
|
166
|
|
|
# Add coeff * J_style_layer of this layer to overall style cost |
|
167
|
|
|
J_style += nst_style_layer.coefficient * J_style_layer |
|
168
|
|
|
|
|
169
|
|
|
return J_style |
|
170
|
|
|
|