tensorflow: RuntimeError: tensorflow/lite/kernels/elementwise.cc:88 Type INT16 is unsupported by op Rsqrt.Node number 34 (RSQRT) failed to prepare.Failed to apply the default TensorFlow Lite delegate indexed at 0.

1. System information

OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
TensorFlow installation (pip package or built from source):
TensorFlow library (version, if pip package or github SHA, if built from source):

2. Code

!pip install git+https://github.com/openai/whisper.git 
!pip install onnx
!pip install onnx_tf
!git clone https://github.com/usefulsensors/openai-whisper.git
!git clone https://github.com/openai/whisper.git 
%%capture
!pip install optimum[onnxruntime] transformers git+https://github.com/openai/whisper.git
# -*- coding: utf-8 -*-
import warnings
warnings.filterwarnings("ignore")

from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
from transformers import (
    set_seed,
    AutoProcessor
)
from pathlib import Path
import os

SEED = 42

# Export vanilla & optimized onnx model
def export_vanilla_optimized_onnx(model_checkpoint):
    set_seed(SEED)
    processor = AutoProcessor.from_pretrained(model_checkpoint)

    # Vanilla
    model = ORTModelForSpeechSeq2Seq.from_pretrained(model_checkpoint, from_transformers=True, use_cache=True)
    onnx_path = Path(os.path.join("exported_onnx_models/", model_checkpoint))
    model.save_pretrained(onnx_path)
    processor.save_pretrained(onnx_path)


export_vanilla_optimized_onnx('openai/whisper-tiny')
import whisper
import torch
import tensorflow as tf
import onnx
import numpy as np
import argparse
import os
import warnings
import tqdm
from onnx_tf.backend import prepare
from whisper.audio import load_audio, log_mel_spectrogram,pad_or_trim,N_FRAMES, SAMPLE_RATE
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

#load openai->whisper(pytorch)->tiny model
tiny_model = whisper.load_model("tiny")

#Export to onnx format
torch.onnx.export(tiny_model.encoder, torch.randn(1,80,3000).to(device), "./whisper-encoder.onnx")
onnx_model_path = './whisper-encoder.onnx'
tf_model_path = 'model_tf-encoder'

onnx_model = onnx.load(onnx_model_path)
tf_rep = prepare(onnx_model)
tf_rep.export_graph(tf_model_path)
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
from transformers import WhisperProcessor, TFWhisperForConditionalGeneration

processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
saved_model_dir = 'model_tf-encoder'
tflite_model_path = 'whisper-encoder-hybrid.tflite'

def representative_dataset_data():
    for x in range(5):
      inputs = processor(ds[x]["audio"]["array"], return_tensors="tf")
      input_features = inputs.input_features
      yield [input_features]

def representative_dataset_random():
    for _ in range(100):
      data = np.random.rand(1, 80, 3000)
      yield [data.astype(np.float32)]

def representative_dataset():
    for _ in range(1):#Change this to 100 and provide 100 different audio files from known dataset 
      mel_from_file = log_mel_spectrogram('/content/whisper/tests/jfk.flac')
      segment = pad_or_trim(mel_from_file, N_FRAMES)
      segment = tf.expand_dims(segment, 0)
      print(segment.shape)
      yield [segment]

# Convert to tflite(int8) model
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8,
                                       tf.lite.OpsSet.SELECT_TF_OPS]
converter.representative_dataset = representative_dataset_data
#converter.inference_input_type = tf.int8  # or tf.uint8
#converter.inference_output_type = tf.int8  # or tf.uint8
converter.inference_input_type = tf.float32
converter.inference_output_type = tf.float32
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()



# Save the model
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

import tensorflow as tf
import numpy as np
tflite_model_path = '/content/whisper-encoder-hybrid.tflite'

# Load the TFLite model and allocate tensors
interpreter_enc = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter_enc.allocate_tensors()

print("== Input details ==")
print("name:", interpreter_enc.get_input_details()[0]['name'])
print("shape:", interpreter_enc.get_input_details()[0]['shape'])
print("type:", interpreter_enc.get_input_details()[0]['dtype'])

print("\nDUMP INPUT")
print(interpreter_enc.get_input_details()[0])

print("\n== Output details ==")
print("name:", interpreter_enc.get_output_details()[0]['name'])
print("shape:", interpreter_enc.get_output_details()[0]['shape'])
print("type:", interpreter_enc.get_output_details()[0]['dtype'])

print("\nDUMP OUTPUT")
print(interpreter_enc.get_output_details()[0])

# Get input and output tensors
input_details = interpreter_enc.get_input_details()
output_details = interpreter_enc.get_output_details()
output_tensor = interpreter_enc.get_output_details()[0]['index']

# Test the model with random data
input_shape = input_details[0]['shape']
mel_from_file = log_mel_spectrogram('/content/whisper/tests/jfk.flac')
input_tensor = pad_or_trim(mel_from_file, N_FRAMES)
input_tensor = tf.expand_dims(input_tensor, 0)

audio = whisper.load_audio('/content/whisper/tests/jfk.flac')
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio)
mel = np.expand_dims(mel,0)
#input_tensor = np.array(input_tensor-128, dtype=np.int8)
interpreter_enc.set_tensor(input_details[0]['index'], mel)

interpreter_enc.invoke()
print("Whisper Encoder Inference executed successfully\n")
encoder_output_data = interpreter_enc.get_tensor(output_tensor)
print(encoder_output_data.shape)
print(encoder_output_data)
np.savetxt("encoder_output.txt", encoder_output_data.reshape((3,-1)), fmt="%s", header=str(encoder_output_data.shape))

Option A: Reference colab notebooks

Reference TensorFlow Model Colab: Demonstrate how to build your TF model.
Reference TensorFlow Lite Model Colab: Demonstrate how to convert your TF model to a TF Lite model (with quantization, if used) and run TFLite Inference (if possible).

(You can paste links or attach files by dragging & dropping them below)
- Provide links to your updated versions of the above two colab notebooks.
- Provide links to your TensorFlow model and (optionally) TensorFlow Lite Model.

Option B: Paste your code here or provide a link to a custom end-to-end colab

(You can paste links or attach files by dragging & dropping them below)
- Include code to invoke the TFLite Converter Python API and the errors.
- Provide links to your TensorFlow model and (optionally) TensorFlow Lite Model.

3. Failure after conversion

Conversion is successful ,however while running the model getting below error RuntimeError Traceback (most recent call last) <ipython-input-32-17a970b6c12f> in <module> 5 # Load the TFLite model and allocate tensors 6 interpreter_enc = tf.lite.Interpreter(model_path=tflite_model_path) ----> 7 interpreter_enc.allocate_tensors() 8 9 print(“== Input details ==”) /usr/local/lib/python3.8/dist-packages/tensorflow/lite/python/interpreter.py in allocate_tensors(self) 511 def allocate_tensors(self): 512 self._ensure_safe() --> 513 return self._interpreter.AllocateTensors() 514 515 def _safe_to_run(self): RuntimeError: tensorflow/lite/kernels/elementwise.cc:88 Type INT16 is unsupported by op Rsqrt.Node number 34 (RSQRT) failed to prepare.Failed to apply the default TensorFlow Lite delegate indexed at 0.

Model produces wrong results and/or has lesser accuracy.
Model produces correct results, but it is slower than expected.

4. (optional) RNN conversion support

If converting TF RNN to TFLite fused RNN ops, please prefix [RNN] in the title.

5. (optional) Any other info / logs

Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.

About this issue

Original URL
State: closed
Created a year ago
Comments: 24 (14 by maintainers)

Most upvoted comments

@Burton2000 I have managed to generate a partial Whisper encoder model using full int8 with only a few layers remaining in float, and I will share the resulting model sometime next week.

nyadla-sys on Feb 19, 2023

I have yet to find support for rsqrt int16 on TFLM (TensorFlow Lite micro). Refer the below link for more details https://github.com/tensorflow/tflite-micro/blob/main/tensorflow/lite/micro/kernels/elementwise.cc#L61

bool IsRsqrtSupportedType(const TfLiteType type) { return type == kTfLiteFloat32 || type == kTfLiteInt8; }

nyadla-sys on Feb 17, 2023

@mohantym I used PTQ(post training quantization )with representative data set and generated int8 model,however it is producing wrong results,Soon I will share colab notebook here

nyadla-sys on Feb 7, 2023