tensorflow: Resize layers in model producing LookupError when computing gradients
System information
- Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):18.04
- TensorFlow version (use command below):2.0
- Python version:3.6.8
- CUDA/cuDNN version:10.0/6.7.0
- GPU model and memory:GTX 1080ti
Describe the current behavior
Currently, I’m getting LookupError: gradient registry has no entry for ResizeBilinearGrad when its computing tape.gradient(d_loss, D.trainable_variables) in the code below.
Describe the expected behavior The code should just run and update the parameters per usual.
Code to reproduce the issue
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, ReLU, Lambda, MaxPool2D, BatchNormalization
from tensorflow.keras import Model, Sequential
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import argparse
import os
import glob
import sys
import time
sys.path.append('..')
########################
## MAKE YOUR NET HERE ##
########################
def conv_mean_pool(inputs, filters, kernel_size):
INPUT = Input(shape=inputs.shape[1:])
x = Conv2D(filters, kernel_size, padding='same')(INPUT)
OUTPUT= (x[::2, ::2, :] + x[::2, 1::2, :] + x[1::2, ::2, :] + x[1::2, 1::2, :])/4
return Model(inputs = INPUT, outputs = OUTPUT)
def upsample_conv(inputs, filters, kernel_size, input_shape):
INPUT=Input(shape = inputs.shape[1:])
x=tf.concat([INPUT, INPUT, INPUT, INPUT], axis = -1)
x=tf.nn.depth_to_space(x, 2)
OUTPUT=Conv2D(filters, kernel_size, padding = 'same')(x)
return Model(inputs = INPUT, outputs = OUTPUT)
def residual_block(name, inputs, filters, kernel_size, strides, input_shape, output_shape, upsample = False):
INPUT=Input(shape = inputs.shape[1:])
x=BatchNormalization()(INPUT)
x=ReLU()(x)
if upsample == True:
x=upsample_conv(x, filters, kernel_size, input_shape)(x)
x=Lambda(lambda z: tf.image.resize(z, size=output_shape, method='nearest'))(x)
original_scaled= x
x= BatchNormalization()(x)
x= ReLU()(x)
OUTPUT= Conv2D(filters, 3, padding='same')(x) + original_scaled
return Model(inputs=INPUT, outputs=OUTPUT, name=name)
else:
x= Conv2D(filters, 3)(x)
x=Lambda(lambda z: tf.image.resize(z, size=output_shape))(x)
original_scaled= x
x= BatchNormalization()(x)
x= ReLU()(x)
OUTPUT= conv_mean_pool(x, filters, kernel_size)(x)
OUTPUT= Conv2D(filters, 3, padding='same')(x) + original_scaled
return Model(inputs=INPUT, outputs=OUTPUT, name=name)
def create_discriminator(z_dim, name='Discriminator'):
INPUT= Input(shape=[100, 100, 3])
x=Lambda(lambda z: tf.image.resize(z, size=[128, 128]))(INPUT)
x=Conv2D(3, 3, padding='same')(x)
x=residual_block('res1', x, 32, 3, 1, [128, 128], [64, 64])(x)
x=residual_block('res2', x, 32, 3, 1, [64, 64], [32, 32])(x)
x=residual_block('res3', x, 32, 3, 1, [32, 32], [16, 16])(x)
x=residual_block('res4', x, 32, 3, 1, [16, 16], [8, 8])(x)
x=residual_block('res5', x, 32, 3, 1, [8, 8], [8, 8])(x)
x=Flatten()(x)
OUTPUT=Dense(1)(x)
return Model(inputs=INPUT, outputs=OUTPUT)
def create_generator(z_dim, name='Generator'):
INPUT=Input((z_dim,))
x=Dense(z_dim*4*4)(INPUT)
x=tf.reshape(x, (-1, 8, 8, 1))
x=residual_block('res1', x, 64, 3, 1, [8, 8], [8, 8], upsample = True)(x)
x=residual_block('res2', x, 32, 3, 1, [8, 8], [16, 16], upsample = True)(x)
x=residual_block('res3', x, 16, 3, 1, [16, 16], [32, 32], upsample = True)(x)
x=residual_block('res4', x, 8, 3, 1, [32, 32], [64, 64], upsample = True)(x)
x=residual_block('res5', x, 4, 3, 1, [64, 64], [128, 128], upsample = True)(x)
x=BatchNormalization()(x)
x=ReLU()(x)
x=Conv2D(3, 3, padding = 'same', activation = 'sigmoid')(x)
OUTPUT=Lambda(lambda z: tf.image.resize(z, size=[100, 100]))(x)
return Model(inputs=INPUT, outputs=OUTPUT, name=name)
if __name__ == '__main__':
parser= argparse.ArgumentParser(description = 'Inputs')
parser.add_argument('--epochs', type = int, default =100)
parser.add_argument('--batch_size', type = int, default =64)
parser.add_argument('--z_dim', type = int, default =128)
parser.add_argument('--n_critic', type = int, default=5)
parser.add_argument('--LAMBDA', type=float, default=10)
parser.add_argument('--shuffle', type = bool, default =True)
parser.add_argument('--num_parallel_calls', type = int, default =4)
parser.add_argument('--buffer_size', type = int, default =1000)
parser.add_argument('--prefetch', type = int, default =1000)
parser.add_argument('--learning_rate', type = float, default =0.001)
parser.add_argument('--beta_1', type = float, default =0.0)
parser.add_argument('--beta_2', type = float, default =0.999)
args=parser.parse_args('')
##########################################################
## MAKING DATALOADERS FOR TRAINING, VALIDATION, TESTING ##
##########################################################
train_data = tf.random.normal(shape=(1, 100, 100, 3))
train_dataloader = tf.data.Dataset.from_tensor_slices(train_data).batch(1)
#######################################
## INITIALIZE MODEL, LOSSES, METRICS ##
#######################################
G=create_generator(args.z_dim, 'Generator')
D=create_discriminator(args.z_dim, 'Discriminator')
fixed_noise=tf.random.normal(shape=(16, args.z_dim))
loss_object=tf.keras.losses.BinaryCrossentropy()
train_loss= tf.keras.metrics.Mean(name = 'train_loss')
test_accuracy= tf.keras.metrics.Accuracy(name = 'test_accuracy')
optimizerG=tf.keras.optimizers.Adam(
learning_rate=args.learning_rate,
beta_1=args.beta_1,
beta_2=args.beta_2,
)
optimizerD = tf.keras.optimizers.Adam(
learning_rate=args.learning_rate,
beta_1=args.beta_1,
beta_2=args.beta_2,
)
G.summary(print_fn=logging.info)
D.summary(print_fn=logging.info)
##############################################
## Loss computations and training functions ##
##############################################
@tf.function
def discriminator_step(fake_image, real_image):
with tf.GradientTape() as tape:
epsilon = tf.random.uniform(
shape=[fake_image.shape[0], 1, 1, 1], minval=0, maxval=1)
interpolated_image = epsilon*fake_image + (1-epsilon)*real_image
d_interpolated = D(interpolated_image)
d_fake = D(fake_image)
d_real = D(real_image)
grad_d = tf.gradients(d_interpolated, [interpolated_image])[0]
slopes = tf.sqrt(
1e-8 + tf.reduce_sum(tf.square(grad_d), axis=[1, 2, 3]))
gradient_penalty = tf.reduce_mean((slopes-1.) ** 2)
d_loss = tf.reduce_mean(
d_fake) - tf.reduce_mean(d_real) + args.LAMBDA * gradient_penalty
gradients = tape.gradient(d_loss, D.trainable_variables)
optimizerD.apply_gradients(
zip(gradients, D.trainable_variables))
return d_loss
@tf.function
def generator_step(fake_image):
with tf.GradientTape() as tape:
d_fake = -D(fake_image)
g_loss = tf.reduce_mean(d_fake)
gradients = tape.gradient(g_loss, G.trainable_variables)
optimizerG.apply_gradients(
zip(gradients, G.trainable_variables))
return g_loss
###################
## TRAINING LOOP ##
###################
iter = 0
d_loss = 0
g_loss = 0
for epoch in range(args.epochs):
start = time.time()
for real_image in train_dataloader:
z = tf.random.normal(shape=(real_image.shape[0], args.z_dim))
fake_image = G(z)
d_loss = discriminator_step(fake_image, real_image)
if iter % args.n_critic == 0:
z = tf.random.normal()
fake_image = G(z)
g_loss = generator_step(fake_image)
Other info / logs
Traceback (most recent call last):
File "code/WGAN-GP_issues.py", line 191, in <module>
d_loss = discriminator_step(fake_image, real_image)
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py", line 457, in __call__
result = self._call(*args, **kwds)
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py", line 503, in _call
self._initialize(args, kwds, add_initializers_to=initializer_map)
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py", line 408, in _initialize
*args, **kwds))
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py", line 1848, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py", line 2150, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py", line 2041, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py", line 915, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py", line 358, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py", line 905, in wrapper
raise e.ag_error_metadata.to_exception(e)
tensorflow.python.autograph.impl.api.StagingError: in converted code:
code/WGAN-GP_issues.py:162 discriminator_step *
gradients = tape.gradient(d_loss, D.trainable_variables)
/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/backprop.py:1014 gradient
unconnected_gradients=unconnected_gradients)
/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/imperative_grad.py:76 imperative_grad
compat.as_str(unconnected_gradients.value))
/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/backprop.py:134 _gradient_function
grad_fn = ops._gradient_registry.lookup(op_name) # pylint: disable=protected-access
/home/xiavatar/tf-2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/registry.py:97 lookup
"%s registry has no entry for: %s" % (self._name, name))
LookupError: gradient registry has no entry for: ResizeBilinearGrad
About this issue
- Original URL
- State: closed
- Created 5 years ago
- Reactions: 4
- Comments: 20 (13 by maintainers)
Has there been any progress in this issue? If someone has implemented or knows how to implement this, can you post it here please?
The issue is that there’re no grad of grad of resize, which is an known issue since long time ago.