openvino: [Bug] TBB Warning: Exact exception propagation is requested by application but the linked library is built without support for it

System information (version)
  • OpenVINO => Build number: 2022.3.0-9052-9752fafe8eb-releases/2022/3
  • Operating System / Platform => centos7
  • Compiler => gcc 4.8
  • Problem classification: Model Inference locally
  • Framework: IR
  • Model name: Quantized custom tensorflow model to IR
Detailed description

I am currently using Go to call the Openvino C API and create several inference requests that I push to a channel in Go. While dealing with client requests, I obtain an idle inference request from the channel and use it to perform inference. After completing the inference, I send it back to the channel.

At the beginning, stress testing is expected. However, after approximately 300 to 1000 requests, a TBB warning appears: TBB Warning: Exact exception propagation is requested by application but the linked library is built without support for it Additionally, the model’s output size is unusual and different from what I expect. For instance, the input shape is (36, 37), where 36 is the batch size and 37 is the sequence length. Given that the model’s hidden dimension is 146, the output size should be 36*37*146=194,472. Nevertheless, I am getting 138,116.

Steps to reproduce

Load model in C

ov_compiled_model_t *LoadModel(const char *IRModelPath)
{
    ov_core_t *core = NULL;
    ov_model_t *model = NULL;
    ov_partial_shape_t partial_shape;
    ov_compiled_model_t *compiled_model = NULL;

    // core
    CHECK_STATUS(ov_core_create(&core));

    // read model
    ov_core_read_model(core, IRModelPath, NULL, &model);

    // set property
    const char* key = ov_property_key_hint_performance_mode;
    // const char* value="THROUGHPUT";
    const char* value="LATENCY";

    // dynamic shape
    ov_dimension_t ddims[2] = {{1, 150}, {1, 512}};
    ov_partial_shape_create(2, ddims, &partial_shape);
    ov_model_reshape_input_by_name(model, "input_token", partial_shape);
    ov_model_reshape_input_by_name(model, "input_segment", partial_shape);

    // compile model
    ov_core_compile_model(core, model, "CPU", 0, &compiled_model, key, value);

    // free
    ov_partial_shape_free(&partial_shape);
    ov_model_free(model);
    ov_core_free(core);

    return compiled_model;
}

Create infer request channle in Go

func LoadModel(model *FTModel, IRModePath string, num_infer int) {
	model_path := C.CString(IRModePath)
	compiled_model := C.LoadModel(model_path)
	C.free(unsafe.Pointer(model_path))

	for i := 0; i < num_infer; i++ {
		infer_request := OVInferQ{Id: i}
		C.ov_compiled_model_create_infer_request(compiled_model, &infer_request.OV_infer_request_t)
		model.OV_ireq_chan <- infer_request
	}
}

Inference in C

struct ner_tuple_result DoInference(ov_infer_request_t *infer_request, float *token_ids, float *senment_ids, int batch_size, int seq_len)
{
    ov_shape_t input_token_shape;
    ov_shape_t input_segment_shape;
    ov_tensor_t *token_tensor = NULL;
    ov_tensor_t *segment_tensor = NULL;
    ov_tensor_t *ner_dense_output_tensor = NULL;
    ov_tensor_t *tuple_dense_output_tensor = NULL;

    // input data type
    ov_element_type_e input_token_type = F32; // F32
    ov_element_type_e input_segment_type = F32;

    // create input tensor
    const int batch_size_ = batch_size;
    const int seq_len_ = seq_len;
    float ids[batch_size_][seq_len_];
    float segs[batch_size_][seq_len_];
    int i, j;
    for (i = 0; i < batch_size_; i++)
    {
        for (j = 0; j < seq_len_; j++)
        {
            ids[i][j] = token_ids[i * batch_size_ + j];
            segs[i][j] = senment_ids[i * batch_size_ + j];
        }
    };
    int64_t dims[2] = {batch_size_, seq_len_};
    ov_shape_create(2, dims, &input_token_shape);
    ov_shape_create(2, dims, &input_segment_shape);
    ov_tensor_create_from_host_ptr(input_token_type, input_token_shape, ids, &token_tensor);
    ov_tensor_create_from_host_ptr(input_segment_type, input_segment_shape, segs, &segment_tensor);

    // set input tensor to infer request
    ov_infer_request_set_tensor(infer_request, "input_token", token_tensor);
    ov_infer_request_set_tensor(infer_request, "input_segment", segment_tensor);

    // start
    // ov_infer_request_start_async(infer_request);
    // ov_infer_request_wait(infer_request);
    ov_infer_request_infer(infer_request);

    // get output tensor
    ov_infer_request_get_output_tensor_by_index(infer_request, 1, &ner_dense_output_tensor);
    ov_infer_request_get_output_tensor_by_index(infer_request, 2, &tuple_dense_output_tensor);

    // get output data
    void *ner_dense_data = NULL;
    ov_tensor_data(ner_dense_output_tensor, &ner_dense_data);
    void *tuple_dense_data = NULL;
    ov_tensor_data(tuple_dense_output_tensor, &tuple_dense_data);

    float *ner_dense_float_data = (float *)(ner_dense_data);
    float *tuple_dense_float_data = (float *)(tuple_dense_data);

    // get output data size
    size_t ner_dense_size;
    ov_tensor_get_size(ner_dense_output_tensor, &ner_dense_size);
    size_t tuple_dense_size;
    ov_tensor_get_size(tuple_dense_output_tensor, &tuple_dense_size);

    // return struct
    struct ner_tuple_result result;
    result.ner_dense = ner_dense_float_data;
    result.tuple_dense = tuple_dense_float_data;
    result.ner_flat_size = ner_dense_size;
    result.tuple_flat_size = tuple_dense_size;
    if (ner_dense_size != batch_size_ * seq_len_ * 146){
        printf("C bad ner_dense_size %zd, batch_size: %d, seq_len: %d \n", ner_dense_size, batch_size_, seq_len_);
    }

    // free
    ov_tensor_free(ner_dense_output_tensor);
    ov_tensor_free(tuple_dense_output_tensor);
    ov_tensor_free(segment_tensor);
    ov_tensor_free(token_tensor);
    ov_shape_free(&input_token_shape);
    ov_shape_free(&input_segment_shape);

    return result;
}

Inference in Go

func (model *FTModel) OVInference(tokenIds [][]float32, segIds [][]float32) ([][][]float32, [][][]float32) {
	ireq := <-model.OV_ireq_chan
	fmt.Println("idle_ireq_id", ireq.Id)

	batch_size := len(tokenIds)
	seq_len := len(tokenIds[0])
	rlt := DoInference(ireq.OV_infer_request_t, tokenIds, segIds, batch_size, seq_len)
	model.OV_ireq_chan <- ireq
	if len(rlt.ner_mat) != batch_size || len(rlt.ner_mat[0]) != seq_len || len(rlt.ner_mat[0][0]) != 146 {
		fmt.Println(batch_size, seq_len, len(rlt.ner_mat), len(rlt.ner_mat[0]), len(rlt.ner_mat[0][0]))
	}
	return rlt.ner_mat, rlt.tuple_mat
}
Issue submission checklist
  • I report the issue, it’s not a question
  • I checked the problem with documentation, FAQ, open issues, Stack Overflow, etc and have not found solution
  • There is reproducer code and related data files: images, videos, models, etc.

About this issue

  • Original URL
  • State: closed
  • Created a year ago
  • Comments: 25 (12 by maintainers)

Most upvoted comments

@xyangk I reproduced the same issue as you, and did a quick debug and found the bug is in the example code you provided, there is out of memory access, please see my fixing:

    for (i = 0; i < batch_size_; i++)
    {
        for (j = 0; j < seq_len_; j++)
        {
            ids[i][j] = token_ids[i * batch_size_ + j];
            segs[i][j] = senment_ids[i * batch_size_ + j];
        }
    };

change to:

    for (i = 0; i < batch_size_; i++)
    {
        for (j = 0; j < seq_len_; j++)
        {
            ids[i][j] = token_ids[i * seq_len_ + j];
            segs[i][j] = senment_ids[i * seq_len_ + j];
        }
    };

After fix above issue, the test example can run successfully:

...
ner_dense_size 821688, batch_size: 42, seq_len: 134, right: 1
ner_dense_size 2705526, batch_size: 71, seq_len: 261, right: 1
ner_dense_size 273312, batch_size: 18, seq_len: 104, right: 1
ner_dense_size 3579336, batch_size: 54, seq_len: 454, right: 1
ner_dense_size 2336, batch_size: 2, seq_len: 8, right: 1
ner_dense_size 3016944, batch_size: 56, seq_len: 369, right: 1
ner_dense_size 2538210, batch_size: 57, seq_len: 305, right: 1
ner_dense_size 296088, batch_size: 12, seq_len: 169, right: 1
ner_dense_size 57816, batch_size: 3, seq_len: 132, right: 1
ner_dense_size 138116, batch_size: 2, seq_len: 473, right: 1
ner_dense_size 277400, batch_size: 5, seq_len: 380, right: 1
ner_dense_size 11826, batch_size: 9, seq_len: 9, right: 1
ner_dense_size 375804, batch_size: 78, seq_len: 33, right: 1
ner_dense_size 1193988, batch_size: 47, seq_len: 174, right: 1
ner_dense_size 2976210, batch_size: 45, seq_len: 453, right: 1
ner_dense_size 462528, batch_size: 22, seq_len: 144, right: 1
ner_dense_size 2182554, batch_size: 99, seq_len: 151, right: 1
ner_dense_size 2420096, batch_size: 64, seq_len: 259, right: 1
ner_dense_size 82928, batch_size: 4, seq_len: 142, right: 1
ner_dense_size 352590, batch_size: 35, seq_len: 69, right: 1
took 296574 ms, count: 1000

SO I think there is no bug in OpenVINO, right?

ok, I can build for you. And at same time, could you also print exception content in: https://github.com/openvinotoolkit/openvino/blob/6bf2fe11aeb891eb66db37932df281a982f90369/src/bindings/c/src/common.h#L18-L24

Sorry, I’m a novice in C language. Could you explain how to use this?

You can modify like this:

#include "openvino/core/except.hpp"
#include "openvino/openvino.hpp"

#define CATCH_IE_EXCEPTION(StatusCode, ExceptionType)         \
    catch (const InferenceEngine::ExceptionType& ex) {        \
        std::cout << "Exception: " << ex.what() << std::endl; \
        return ov_status_e::StatusCode;                       \
    }
#define CATCH_OV_EXCEPTION(StatusCode, ExceptionType)         \
    catch (const ov::ExceptionType& ex) {                     \
        std::cout << "Exception: " << ex.what() << std::endl; \
        return ov_status_e::StatusCode;                       \
    }

#define CATCH_OV_EXCEPTIONS                                   \
	@@ -39,7 +41,8 @@
    CATCH_IE_EXCEPTION(INFER_NOT_STARTED, InferNotStarted)    \
    CATCH_IE_EXCEPTION(NETWORK_NOT_READ, NetworkNotRead)      \
    CATCH_IE_EXCEPTION(INFER_CANCELLED, InferCancelled)       \
    catch (const std::exception& ex) {                        \
        std::cout << "Exception: " << ex.what() << std::endl; \
        return ov_status_e::UNKNOW_EXCEPTION;                 \
    }

You can apply below patch to print exception information:

diff --git a/src/bindings/c/src/common.h b/src/bindings/c/src/common.h
index dda5513cae..06edcd5a29 100644
--- a/src/bindings/c/src/common.h
+++ b/src/bindings/c/src/common.h
@@ -39,7 +39,8 @@
     CATCH_IE_EXCEPTION(INFER_NOT_STARTED, InferNotStarted)    \
     CATCH_IE_EXCEPTION(NETWORK_NOT_READ, NetworkNotRead)      \
     CATCH_IE_EXCEPTION(INFER_CANCELLED, InferCancelled)       \
-    catch (...) {                                             \
+    catch (const std::exception& ex) {                        \
+        std::cout << "Exception: " << ex.what() << std::endl; \
         return ov_status_e::UNKNOW_EXCEPTION;                 \
     }

If you needed, I can build one engineering-test version for you with this pacth.

Good finding! Look like some memory pointer issue during memory copy, which lead to memory skip and overwritten.