tensorflow: Tensorflow 2.0.0-rc0 - incorrect data shape for SparseCategoricalCrossentropy

Hi,

I’m trying to train a classifier using tensorflow.keras that predicts categorical labels (0/1/2). Input data is an ndarray of tf.float32. I used tensorflow=2.0.0-beta0 and then tensorflow=2.0.0-rc0 to produce the error messages below.

The output seems to be in the correct form as per SparseCategoricalCrossentropy :
Input : [7 1]
Output: [7 3]
Where batch size is 7.

tensorflow=2.0.0-rc0:

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-5-fe1cc3e1dca1> in <module>
     47 print(model.summary())
     48 
---> 49 model.fit(train.batch(BATCH_SIZE), epochs=EPOCHS, verbose=2)
     50 model.evaluate(train, steps=None, verbose=1)

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    732         max_queue_size=max_queue_size,
    733         workers=workers,
--> 734         use_multiprocessing=use_multiprocessing)
    735 
    736   def evaluate(self,

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    322                 mode=ModeKeys.TRAIN,
    323                 training_context=training_context,
--> 324                 total_epochs=epochs)
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    326 

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    121         step=step, mode=mode, size=current_batch_size) as batch_logs:
    122       try:
--> 123         batch_outs = execution_function(iterator)
    124       except (StopIteration, errors.OutOfRangeError):
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     84     # `numpy` translates Tensors to values in Eager mode.
     85     return nest.map_structure(_non_none_constant_value,
---> 86                               distributed_function(input_fn))
     87 
     88   return execution_function

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    437         # Lifting succeeded, so variables are initialized and we can run the
    438         # stateless function.
--> 439         return self._stateless_fn(*args, **kwds)
    440     else:
    441       canon_args, canon_kwds = \

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   1820     """Calls a graph function specialized to the inputs."""
   1821     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1822     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   1823 
   1824   @property

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1139          if isinstance(t, (ops.Tensor,
   1140                            resource_variable_ops.BaseResourceVariable))),
-> 1141         self.captured_inputs)
   1142 
   1143   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1222     if executing_eagerly:
   1223       flat_outputs = forward_function.call(
-> 1224           ctx, args, cancellation_manager=cancellation_manager)
   1225     else:
   1226       gradient_name = self._delayed_rewrite_functions.register()

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    509               inputs=args,
    510               attrs=("executor_type", executor_type, "config_proto", config),
--> 511               ctx=ctx)
    512         else:
    513           outputs = execute.execute_with_cancellation(

/usr/local/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     65     else:
     66       message = e.message
---> 67     six.raise_from(core._status_to_exception(e.code, message), None)
     68   except TypeError as e:
     69     keras_symbolic_tensors = [

~/Library/Python/3.7/lib/python/site-packages/six.py in raise_from(value, from_value)

InvalidArgumentError:  assertion failed: [] [Condition x == y did not hold element-wise:] [x (loss/dense_3_loss/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [7 1] [y (loss/dense_3_loss/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [7 3]
	 [[node loss/dense_3_loss/SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert (defined at /usr/local/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_2031]

Function call stack:
distributed_function

With tensorflow=2.0.0-beta0:

InvalidArgumentError:  logits and labels must have the same first dimension, got logits shape [7,3] and labels shape [7]
	 [[node loss/dense_2_loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at <ipython-input-3-4fa88a5fad5e>:70) ]] [Op:__inference_keras_scratch_graph_5038]

code to reproduce:

import numpy as np
import pandas as pd
import random
import tensorflow as tf
import time as tm

INPUT_SHAPE=[3, 5]
NUM_POINTS=20
BATCH_SIZE=7
EPOCHS=4

def data_gen(num, in_shape):
    for i in range(num):
        x = np.random.rand(in_shape[0], in_shape[1])
        y = random.randint(0,2)
        yield x, y
        
def data_gen_all(num, in_shape, num_labels):
    x = np.zeros([num]+in_shape)
    y = np.zeros([num]+[num_labels])
    for i in range(num):
        x[i,:,:]= np.random.rand(in_shape[0], in_shape[1])
        y[i]= tf.one_hot(random.randint(0, num_labels), num_labels).numpy()
    return x, y

train = tf.data.Dataset.from_generator(
    generator=data_gen,
    output_types=(tf.float32, tf.int32),
#     output_shapes=(tf.TensorShape([None, INPUT_SHAPE[1]]), tf.TensorShape(None)),
#     output_shapes=(tf.TensorShape(INPUT_SHAPE), tf.TensorShape(())),
    output_shapes=([None, INPUT_SHAPE[1]],()),
    args=([NUM_POINTS, INPUT_SHAPE])
)

def create_model(input_shape):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(100, activation="tanh",input_shape=input_shape),        
        tf.keras.layers.Dense(3, activation="softmax", kernel_regularizer= tf.keras.regularizers.l2(0.001))
    ])
    return model

model = create_model(input_shape=INPUT_SHAPE)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4, clipvalue=1.0),
    loss= tf.keras.losses.SparseCategoricalCrossentropy(),
#     loss= tf.keras.losses.CategoricalCrossentropy()
    )
print(model.summary())
model.fit(train.batch(BATCH_SIZE), epochs=EPOCHS, verbose=2)
model.evaluate(train, steps=None, verbose=1)

### CategoricalCrossentropy
x,y = data_gen_all(num=20, in_shape=INPUT_SHAPE, num_labels=3)
print(x.shape)
model.fit(x=x, y=y, epochs=EPOCHS, verbose=2)

https://stackoverflow.com/questions/57842734/tensorflow-2-0-incrrect-data-shape-for-sparsecategoricalcrossentropy

Platform: MacOS 10.14.6
tensorflow=2.0.0-rc0
tf_env_collect.sh produced:

== check python ===================================================
python version: 3.7.4
python branch: 
python build version: ('default', 'Jul  9 2019 18:13:23')
python compiler version: Clang 10.0.1 (clang-1001.0.46.4)
python implementation: CPython


== check os platform ===============================================
os: Darwin
os kernel version: Darwin Kernel Version 18.7.0: Tue Aug 20 16:57:14 PDT 2019; root:xnu-4903.271.2~2/RELEASE_X86_64
os release version: 18.7.0
os platform: Darwin-18.7.0-x86_64-i386-64bit
linux distribution: ('', '', '')
linux os distribution: ('', '', '')
mac version: ('10.14.6', ('', '', ''), 'x86_64')
uname: uname_result(system='Darwin', node='chnb.local', release='18.7.0', version='Darwin Kernel Version 18.7.0: Tue Aug 20 16:57:14 PDT 2019; root:xnu-4903.271.2~2/RELEASE_X86_64', machine='x86_64', processor='i386')
architecture: ('64bit', '')
machine: x86_64


== are we in docker =============================================
No

== compiler =====================================================
Apple LLVM version 10.0.1 (clang-1001.0.46.4)
Target: x86_64-apple-darwin18.7.0
Thread model: posix
InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin

== check pips ===================================================
numpy                1.17.2              
protobuf             3.9.1               
tensorflow           2.0.0b0             

== check for virtualenv =========================================
False

== tensorflow import ============================================
tf.version.VERSION = 2.0.0-beta0
tf.version.GIT_VERSION = v1.12.1-3259-gf59745a381
tf.version.COMPILER_VERSION = 4.2.1 Compatible Apple LLVM 10.0.0 (clang-1000.11.45.5)
/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
/usr/local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/usr/local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/usr/local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/usr/local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/usr/local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/usr/local/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])

== env ==========================================================
LD_LIBRARY_PATH is unset
DYLD_LIBRARY_PATH is unset

== nvidia-smi ===================================================
./collect.sh: line 147: nvidia-smi: command not found

== cuda libs  ===================================================

== tensorflow installed from info ==================
Name: tensorflow
Version: 2.0.0b0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/lib/python3.7/site-packages
Required-by: 

== python version  ==============================================
(major, minor, micro, releaselevel, serial)
(2, 7, 16, 'final', 0)

== bazel version  ===============================================
Build label: 0.29.0-homebrew
Build time: Wed Aug 28 18:07:41 2019 (1567015661)
Build timestamp: 1567015661
Build timestamp as int: 1567015661

About this issue

  • Original URL
  • State: closed
  • Created 5 years ago
  • Comments: 15 (3 by maintainers)

Most upvoted comments

I’m facing the same error message but only when I add callbacks.
Adding anyone of them separately or together would trigger the issue but without them the code runs smoothly.

model = Sequential()
model.add(Convolution2D(4, (1,3), input_shape=X.shape[1:]))
##model.add(Convolution2D(8, (3,3)))
model.add(Convolution2D(2, (3,3), dilation_rate=(1,1)))
model.add(Convolution2D(1, (3,3), dilation_rate=(4,4)))
model.add(Flatten())
model.add(Dense(3, activation="sigmoid"))
opt = SGD(learning_rate=0.013)
model.compile(optimizer=opt,
              loss="MSE",
              metrics=["accuracy"])

# *** These Callbacks
early_stop = EarlyStopping(monitor='val_loss',
                           patience=10,
                           verbose=True,
                           mode='min')
chckpnt_save = ModelCheckpoint('\\MODELS',
                              save_best_only=True,
                              monitor='val_loss',
                              mode='min')
rdc_lr_loss = ReduceLROnPlateau(monitor='val_loss',
                                factor=0.1,
                                patience=7,
                                verbose=True,
                                epsilon=1e-4,
                                mode='min')
# ***

model.fit(X,Y,epochs=5000,
          batch_size=3,
          validation_split=0.175,
          callbacks=[early_stop, chckpnt_save , rdc_lr_loss ])

Error Message:

  File "Whatsapp_Organyzer_B.py", line 52, in <module>
    callbacks=[early_stop])
  File "C:\Python37\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 819, in fit
    use_multiprocessing=use_multiprocessing)
  File "C:\Python37\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 342, in fit
    total_epochs=epochs)
  File "C:\Python37\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 128, in run_one_epoch
    batch_outs = execution_function(iterator)
  File "C:\Python37\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 98, in execution_function
    distributed_function(input_fn))
  File "C:\Python37\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 568, in __call__
    result = self._call(*args, **kwds)
  File "C:\Python37\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 632, in _call
    return self._stateless_fn(*args, **kwds)
  File "C:\Python37\lib\site-packages\tensorflow_core\python\eager\function.py", line 2363, in __call__
    return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
  File "C:\Python37\lib\site-packages\tensorflow_core\python\eager\function.py", line 1611, in _filtered_call
    self.captured_inputs)
  File "C:\Python37\lib\site-packages\tensorflow_core\python\eager\function.py", line 1692, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager))
  File "C:\Python37\lib\site-packages\tensorflow_core\python\eager\function.py", line 545, in call
    ctx=ctx)
  File "C:\Python37\lib\site-packages\tensorflow_core\python\eager\execute.py", line 67, in quick_execute
    six.raise_from(core._status_to_exception(e.code, message), None)
  File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
         [[node sequential/conv2d/Conv2D (defined at Whatsapp_Organyzer_B.py:52) ]] [Op:__inference_distributed_function_705]

Function call stack:
distributed_function

@super-pirata Getting the same Error even without callbacks

@jvishnuvardhan Thanks for the update.
To reproduce you need to comment out the line where you add the LSTM to the model tf.keras.layers.LSTM(1, activation="tanh"), .

Dataset: 6000 Descriptions: train=6000 Photos: train=6000 Vocabulary Size: 7579 Description Length: 34 Model: “model_6”


Layer (type) Output Shape Param # Connected to

input_14 (InputLayer) [(None, 34)] 0


input_13 (InputLayer) [(None, 4096)] 0


embedding_6 (Embedding) (None, 34, 256) 1940224 input_14[0][0]


dropout_12 (Dropout) (None, 4096) 0 input_13[0][0]


dropout_13 (Dropout) (None, 34, 256) 0 embedding_6[0][0]


dense_18 (Dense) (None, 256) 1048832 dropout_12[0][0]


lstm_6 (LSTM) (None, 256) 525312 dropout_13[0][0]


add_6 (Add) (None, 256) 0 dense_18[0][0]
lstm_6[0][0]


dense_19 (Dense) (None, 256) 65792 add_6[0][0]


dense_20 (Dense) (None, 7579) 1947803 dense_19[0][0]

Total params: 5,527,963 Trainable params: 5,527,963 Non-trainable params: 0



ValueError Traceback (most recent call last) <ipython-input-26-611f9002c85b> in <module>() 168 generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size) 169 # fit for one epoch –> 170 model.fit(generator, epochs=1, steps_per_epoch=steps, verbose=1) 171 # save model 172 model.save(‘model_’ + str(i) + ‘.h5’)

9 frames /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs) 975 except Exception as e: # pylint:disable=broad-except 976 if hasattr(e, “ag_error_metadata”): –> 977 raise e.ag_error_metadata.to_exception(e) 978 else: 979 raise

ValueError: in user code:

/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
    return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
    outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
    return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
    return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
    return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
    outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:754 train_step
    y_pred = self(x, training=True)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
    input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/input_spec.py:207 assert_input_compatibility
    ' input tensors. Inputs received: ' + str(inputs))

ValueError: Layer model_6 expects 2 input(s), but it received 3 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, None) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, None) dtype=int32>, <tf.Tensor 'IteratorGetNext:2' shape=(None, None) dtype=float32>]