TensorFlowTTS: Cannot train MFA-aligned FastSpeech2 with gradient accumulator: ValueError: None values not supported.

I tried training FastSpeech2 on LJSpeech resampled to 24KHz with gradient_accumulation_steps: 1 and batch size 128 with mixed precision on a Tesla T4 (14GB of VRAM) and got this:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
Traceback (most recent call last):
  File "/content/TensorflowTTS/ttsexamples/fastspeech2/train_fastspeech2.py", line 436, in <module>
    main()
  File "/content/TensorflowTTS/ttsexamples/fastspeech2/train_fastspeech2.py", line 428, in main
    resume=args.resume,
  File "/content/TensorflowTTS/tensorflow_tts/trainers/base_trainer.py", line 1002, in fit
    self.run()
  File "/content/TensorflowTTS/tensorflow_tts/trainers/base_trainer.py", line 103, in run
    self._train_epoch()
  File "/content/TensorflowTTS/tensorflow_tts/trainers/base_trainer.py", line 125, in _train_epoch
    self._train_step(batch)
  File "/content/TensorflowTTS/tensorflow_tts/trainers/base_trainer.py", line 780, in _train_step
    self.one_step_forward(batch)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
    result = self._call(*args, **kwds)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 823, in _call
    self._initialize(args, kwds, add_initializers_to=initializers)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 697, in _initialize
    *args, **kwds))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 2855, in _get_concrete_function_internal_garbage_collected
    graph_function, _, _ = self._maybe_define_function(args, kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 3213, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 3075, in _create_graph_function
    capture_by_value=self._capture_by_value),
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py", line 986, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 600, in wrapped_fn
    return weak_wrapped_fn().__wrapped__(*args, **kwds)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py", line 973, in wrapper
    raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:

    /content/TensorflowTTS/tensorflow_tts/trainers/base_trainer.py:788 _one_step_forward  *
        per_replica_losses = self._strategy.run(
    /content/TensorflowTTS/tensorflow_tts/trainers/base_trainer.py:835 _one_step_forward_per_replica  *
        self._optimizer.apply_gradients(
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py:380 apply_gradients  **
        args=(grads_and_vars, name, experimental_aggregate_gradients))
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2715 merge_call
        return self._merge_call(merge_fn, args, kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2722 _merge_call
        return merge_fn(self._strategy, *args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py:410 _apply_gradients_cross_replica  **
        do_not_apply_fn)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/smart_cond.py:59 smart_cond
        name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py:507 new_func
        return func(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/control_flow_ops.py:1180 cond
        return cond_v2.cond_v2(pred, true_fn, false_fn, name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/cond_v2.py:85 cond_v2
        op_return_value=pred)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py:986 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py:396 apply_fn
        args=(grads, wrapped_vars, name, experimental_aggregate_gradients))
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/one_device_strategy.py:367 _call_for_each_replica
        return fn(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py:420 _apply_gradients
        experimental_aggregate_gradients=experimental_aggregate_gradients)
    /content/TensorflowTTS/tensorflow_tts/optimizers/adamweightdecay.py:124 apply_gradients
        (grads, _) = tf.clip_by_global_norm(grads, clip_norm=clip_norm)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/clip_ops.py:352 clip_by_global_norm
        constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1124 binary_op_wrapper
        return func(x, y, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1296 truediv
        return _truediv_python3(x, y, name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1222 _truediv_python3
        y = ops.convert_to_tensor(y, dtype_hint=x.dtype.base_dtype, name="y")
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1499 convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:338 _constant_tensor_conversion_function
        return constant(v, dtype=dtype, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:264 constant
        allow_broadcast=True)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:282 _constant_impl
        allow_broadcast=allow_broadcast))
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_util.py:444 make_tensor_proto
        raise ValueError("None values not supported.")

    ValueError: None values not supported.

[train]:   0% 0/150000 [01:14<?, ?it/s]

Any ideas?

About this issue

  • Original URL
  • State: closed
  • Created 4 years ago
  • Comments: 15

Commits related to this issue

Most upvoted comments

@dathudeptrai

did you pull the newest code in master, seems the bug come from dataloader.

Now it works well.