libedgetpu: Interpreter->invoke() calls Segmentation Fault

Description

System information

OS Platform and Distribution: Linux Ubuntu 18.04 with USB Coral, and Raspberry Pi 4 B.
TensorFlow version : v2.5.0, v2.6.0

Description When i calls initTfLiteInterpreter(), tfLite work is correct, output has all information (boxes, labels and classes), time of invoke is good (15 ms). But when i calls processingFrame(cv::Mat) in other classes with equalent code (i get Seg fault on Interpreter->invoke()):

        // From other class 
for(int i = 0; i< 10; i++)
    {
        cv::Mat testImage = cv::imread(TestClass->EXAMPLE_FRAME);
        TestClass->processingFrame(testImage);
    }

I get this error with TF 2.5.0, 2.6.0.

Source of my programm TestClass.h:

/// Build EDGE Interpreter for Coral
    void BuildEdgeTpuInterpreter(const tflite::FlatBufferModel &model,
                                edgetpu::EdgeTpuContext *edgetpu_context);

    // Load graph to coral
    void initTfLiteInterpreter();

    // Processing the received frame
    void processingFrame(cv::Mat& frame);

    int num_threads = 1;
    std::unique_ptr<tflite::Interpreter> interpreter;
    std::shared_ptr<edgetpu::EdgeTpuContext> tpu_context;

    TfLiteTensor* input_tensor;
    TfLiteTensor* output_locations;
    TfLiteTensor* output_classes;
    TfLiteTensor* output_scores;
    TfLiteTensor* num_detections_;

    int height;
    int width;
    int channels;
    int row_elems;

TestClass.cxx:

void TestClass::BuildEdgeTpuInterpreter(const tflite::FlatBufferModel &model,
                                                               edgetpu::EdgeTpuContext *edgetpu_context)
{
    tflite::ops::builtin::BuiltinOpResolver resolver;
    resolver.AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
    if (tflite::InterpreterBuilder(model, resolver)(&interpreter) != kTfLiteOk) {
        std::cerr << "Failed to build interpreter." << std::endl;
        return;
    }
    // Allocate tensor buffers.
    // Bind given context with interpreter.
    interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
    interpreter->SetNumThreads(1);
    if (interpreter->AllocateTensors() != kTfLiteOk)
    {
      std::cerr << "Failed to allocate tensors." << std::endl;
    }
}

void TestClass::initTfLiteInterpreter(void)
{
    auto model = tflite::FlatBufferModel::BuildFromFile(GRAPH.c_str());

    tpu_context = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
    std::cout << "Checking readiness of Coral device" << std::endl;
    if(!tpu_context->IsReady())
    {
        std::cout << "Coral device is not ready" << std::endl;
        throw -1;
    }
    std::cout << "EDGE TPU path: " << tpu_context->GetDeviceEnumRecord().path << std::endl;
    BuildEdgeTpuInterpreter(*model, tpu_context.get());

    input_tensor = interpreter->tensor(interpreter->inputs()[0]);
    output_locations = interpreter->tensor(interpreter->outputs()[0]);
    output_classes = interpreter->tensor(interpreter->outputs()[1]);
    output_scores = interpreter->tensor(interpreter->outputs()[2]);
    num_detections_ = interpreter->tensor(interpreter->outputs()[3]);

    height = input_tensor->dims->data[1];
    width = input_tensor->dims->data[2];
    channels = input_tensor->dims->data[3];
    row_elems = width * channels;

    for(int i = 0; i< 10; i++)
    {
        cv::Mat testImage = cv::imread(EXAMPLE_FRAME);
        processingFrame(testImage);
    }
    
    Utils::dual_write("CNN is ready, example frame was processed");
    m_readyFlag.store(true);
}

void TestClass::processingFrame(cv::Mat& frame)
{
    Q_ASSERT(q_ptr);
    const clock_t begin_time = clock();
    QMutexLocker locker(&m_mutex);
    qDebug() << "cv mat size: " << width << height;
    cvtColor(frame, frame, cv::COLOR_BGR2RGB);
    // Resize for model input
    cv::resize(frame, frame, cv::Size(width, height));

    uint8_t* dst = input_tensor->data.uint8;
    for (int row = 0; row < height; row++) {
        memcpy(dst, frame.ptr(row), row_elems);
        dst += row_elems;
    }
    if (input_tensor->type != kTfLiteUInt8 ||           //
        input_tensor->dims->data[0] != 1 ||             //
        input_tensor->dims->data[1] != height ||  //
        input_tensor->dims->data[2] != width ||   //
        input_tensor->dims->data[3] != channels) {
    std::cerr << "Input tensor shape does not match input image" << std::endl;
    return;
    }
    if(interpreter->Invoke() != kTfLiteOk)
        qDebug() << "Invoke is broken";
    qDebug() << "Invoke is done!";
    const float* detection_locations = output_locations->data.f;
    const float* detection_classes = output_classes->data.f;
    const float* detection_scores = output_scores->data.f;
    const int num_detections = *(num_detections_->data.f);
    for (int i = 0; i < num_detections; i++) {
        const float score = detection_scores[i];
        const std::string label = std::to_string(uint8_t(detection_classes[i]));
        const float yMin = detection_locations[4 * i + 0];
        const float xMin = detection_locations[4 * i + 1];
        const float yMax = detection_locations[4 * i + 2];
        const float xMax = detection_locations[4 * i + 3];
        if (score > thresholdScore) {
            std::cout << label << " score:" << score << std::endl;
            emit q_ptr->returnBoundingBoxes(frame, yMin, xMin, yMax, xMax, score, label, true);
        }
    }
    std::cout << "time: " << float( clock () - begin_time ) /  CLOCKS_PER_SEC << std::endl;
    emit q_ptr->finishedCNNProcessing(frame);
}

Click to expand!

Issue Type

Bug

Operating System

Ubuntu

Coral Device

USB Accelerator

Other Devices

No response

Programming Language

C++

Relevant Log Output

**LOGS**

Checking readiness of Coral device
EDGE TPU path: /sys/bus/usb/devices/2-1
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.022215
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.012465
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.011841
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.011659
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.014413
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.011502
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.012496
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.012136
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.012898
cv mat size:  640 480
Invoke is done!
1 score:0.902344
time: 0.012129
Thu Dec  9 11:52:59 2021:  CNN is ready, example frame was processed
cv mat size:  640 480
Segmentation fault (core dumped)

**GDB out**
0x000000000067d47c in tflite::ops::custom::detection_postprocess::DecodeCenterSizeBoxes(TfLiteContext*, TfLiteNode*, tflite::ops::custom::detection_postprocess::OpData*)

</details>

About this issue

Original URL
State: closed
Created 3 years ago
Comments: 20

Most upvoted comments

I managed to solve the problem. using a shared pointer to the interpreter and passing the pointer to the invoke execution area.

enfild on Jan 19, 2022