TensorFlowTTS: RuntimeError when trying to inference From TFlite for Fastspeech2
Hi, So I converted Fastspeech2 model to TFlite, when I tried to inference from TFlite I am getting this error
decoder_output_tflite, mel_output_tflite = infer(input_text) interpreter.invoke() File “/home/zak/venv/lib/python3.8/site-packages/tensorflow/lite/python/interpreter.py”, line 539, in invoke self._interpreter.Invoke() RuntimeError: tensorflow/lite/kernels/reshape.cc:55 stretch_dim != -1 (0 != -1)Node number 83 (RESHAPE) failed to prepare.
the code I used for this purpose is
import numpy as np import yaml import tensorflow as tf
from tensorflow_tts.processor import ZAKSpeechProcessor from tensorflow_tts.processor.ZAKspeech import ZAKSPEECH_SYMBOLS
from tensorflow_tts.configs import FastSpeechConfig, FastSpeech2Config from tensorflow_tts.configs import MultiBandMelGANGeneratorConfig
from tensorflow_tts.models import TFFastSpeech, TFFastSpeech2 from tensorflow_tts.models import TFMBMelGANGenerator
from IPython.display import Audio
Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path=‘fastspeech2_quant.tflite’)
Get input and output tensors.
input_details = interpreter.get_input_details() output_details = interpreter.get_output_details()
Prepare input data.
def prepare_input(input_ids): input_ids = tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0) return (input_ids, tf.convert_to_tensor([0], tf.int32), tf.convert_to_tensor([1.0], dtype=tf.float32), tf.convert_to_tensor([1.0], dtype=tf.float32), tf.convert_to_tensor([1.0], dtype=tf.float32))
Test the model on random input data.
def infer(input_text): for x in input_details: print(x) for x in output_details: print(x) processor = ZAKSpeechProcessor(data_dir=None, symbols=ZAKSPEECH_SYMBOLS, cleaner_names=“arabic_cleaners”) input_ids = processor.text_to_sequence(input_text.lower()) interpreter.resize_tensor_input(input_details[0][‘index’], [1, len(input_ids)]) interpreter.resize_tensor_input(input_details[1][‘index’], [1]) interpreter.resize_tensor_input(input_details[2][‘index’], [1]) interpreter.resize_tensor_input(input_details[3][‘index’], [1]) interpreter.resize_tensor_input(input_details[4][‘index’], [1]) interpreter.allocate_tensors() input_data = prepare_input(input_ids) for i, detail in enumerate(input_details): input_shape = detail[‘shape’] interpreter.set_tensor(detail[‘index’], input_data[i])
interpreter.invoke()
The function get_tensor() returns a copy of the tensor data.
Use tensor() in order to get a pointer to the tensor.
return (interpreter.get_tensor(output_details[0][‘index’]), interpreter.get_tensor(output_details[1][‘index’]))
initialize melgan model
with open(‘…/examples/multiband_melgan/conf/multiband_melgan.v1.yaml’) as f: mb_melgan_config = yaml.load(f, Loader=yaml.Loader) mb_melgan_config = MultiBandMelGANGeneratorConfig(**mb_melgan_config[“multiband_melgan_generator_params”]) mb_melgan = TFMBMelGANGenerator(config=mb_melgan_config, name=‘mb_melgan_generator’) mb_melgan._build() mb_melgan.load_weights(“…/examples/multiband_melgan/exp/train.multiband_melgan.v1/checkpoints/generator-1000000.h5”)
input_text = “”
decoder_output_tflite, mel_output_tflite = infer(input_text) audio_before_tflite = mb_melgan(decoder_output_tflite)[0, :, 0] audio_after_tflite = mb_melgan(mel_output_tflite)[0, :, 0]
appreciate your help
About this issue
- Original URL
- State: closed
- Created 4 years ago
- Comments: 44 (5 by maintainers)
Removed the dropout calls (but leaving the embeddings) fixed this issue. Flex delegate was no longer needed for inference/compilation, and inference appears to work correctly.
@dathudeptrai Flex delegate is required for FastSpeech2 TFLite because it can’t be exported with only tflite built-in ops and it’s required for regular TF ops (at least the last time I tried)