libedgetpu: Interpreter->invoke() calls Segmentation Fault
Description
System information
- OS Platform and Distribution: Linux Ubuntu 18.04 with USB Coral, and Raspberry Pi 4 B.
- TensorFlow version : v2.5.0, v2.6.0
Description
When i calls initTfLiteInterpreter()
, tfLite work is correct, output has all information (boxes, labels and classes), time of invoke is good (15 ms). But when i calls processingFrame(cv::Mat)
in other classes with equalent code (i get Seg fault on Interpreter->invoke()
):
// From other class
for(int i = 0; i< 10; i++)
{
cv::Mat testImage = cv::imread(TestClass->EXAMPLE_FRAME);
TestClass->processingFrame(testImage);
}
I get this error with TF 2.5.0, 2.6.0.
Source of my programm TestClass.h:
/// Build EDGE Interpreter for Coral
void BuildEdgeTpuInterpreter(const tflite::FlatBufferModel &model,
edgetpu::EdgeTpuContext *edgetpu_context);
// Load graph to coral
void initTfLiteInterpreter();
// Processing the received frame
void processingFrame(cv::Mat& frame);
int num_threads = 1;
std::unique_ptr<tflite::Interpreter> interpreter;
std::shared_ptr<edgetpu::EdgeTpuContext> tpu_context;
TfLiteTensor* input_tensor;
TfLiteTensor* output_locations;
TfLiteTensor* output_classes;
TfLiteTensor* output_scores;
TfLiteTensor* num_detections_;
int height;
int width;
int channels;
int row_elems;
TestClass.cxx:
void TestClass::BuildEdgeTpuInterpreter(const tflite::FlatBufferModel &model,
edgetpu::EdgeTpuContext *edgetpu_context)
{
tflite::ops::builtin::BuiltinOpResolver resolver;
resolver.AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
if (tflite::InterpreterBuilder(model, resolver)(&interpreter) != kTfLiteOk) {
std::cerr << "Failed to build interpreter." << std::endl;
return;
}
// Allocate tensor buffers.
// Bind given context with interpreter.
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
interpreter->SetNumThreads(1);
if (interpreter->AllocateTensors() != kTfLiteOk)
{
std::cerr << "Failed to allocate tensors." << std::endl;
}
}
void TestClass::initTfLiteInterpreter(void)
{
auto model = tflite::FlatBufferModel::BuildFromFile(GRAPH.c_str());
tpu_context = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
std::cout << "Checking readiness of Coral device" << std::endl;
if(!tpu_context->IsReady())
{
std::cout << "Coral device is not ready" << std::endl;
throw -1;
}
std::cout << "EDGE TPU path: " << tpu_context->GetDeviceEnumRecord().path << std::endl;
BuildEdgeTpuInterpreter(*model, tpu_context.get());
input_tensor = interpreter->tensor(interpreter->inputs()[0]);
output_locations = interpreter->tensor(interpreter->outputs()[0]);
output_classes = interpreter->tensor(interpreter->outputs()[1]);
output_scores = interpreter->tensor(interpreter->outputs()[2]);
num_detections_ = interpreter->tensor(interpreter->outputs()[3]);
height = input_tensor->dims->data[1];
width = input_tensor->dims->data[2];
channels = input_tensor->dims->data[3];
row_elems = width * channels;
for(int i = 0; i< 10; i++)
{
cv::Mat testImage = cv::imread(EXAMPLE_FRAME);
processingFrame(testImage);
}
Utils::dual_write("CNN is ready, example frame was processed");
m_readyFlag.store(true);
}
void TestClass::processingFrame(cv::Mat& frame)
{
Q_ASSERT(q_ptr);
const clock_t begin_time = clock();
QMutexLocker locker(&m_mutex);
qDebug() << "cv mat size: " << width << height;
cvtColor(frame, frame, cv::COLOR_BGR2RGB);
// Resize for model input
cv::resize(frame, frame, cv::Size(width, height));
uint8_t* dst = input_tensor->data.uint8;
for (int row = 0; row < height; row++) {
memcpy(dst, frame.ptr(row), row_elems);
dst += row_elems;
}
if (input_tensor->type != kTfLiteUInt8 || //
input_tensor->dims->data[0] != 1 || //
input_tensor->dims->data[1] != height || //
input_tensor->dims->data[2] != width || //
input_tensor->dims->data[3] != channels) {
std::cerr << "Input tensor shape does not match input image" << std::endl;
return;
}
if(interpreter->Invoke() != kTfLiteOk)
qDebug() << "Invoke is broken";
qDebug() << "Invoke is done!";
const float* detection_locations = output_locations->data.f;
const float* detection_classes = output_classes->data.f;
const float* detection_scores = output_scores->data.f;
const int num_detections = *(num_detections_->data.f);
for (int i = 0; i < num_detections; i++) {
const float score = detection_scores[i];
const std::string label = std::to_string(uint8_t(detection_classes[i]));
const float yMin = detection_locations[4 * i + 0];
const float xMin = detection_locations[4 * i + 1];
const float yMax = detection_locations[4 * i + 2];
const float xMax = detection_locations[4 * i + 3];
if (score > thresholdScore) {
std::cout << label << " score:" << score << std::endl;
emit q_ptr->returnBoundingBoxes(frame, yMin, xMin, yMax, xMax, score, label, true);
}
}
std::cout << "time: " << float( clock () - begin_time ) / CLOCKS_PER_SEC << std::endl;
emit q_ptr->finishedCNNProcessing(frame);
}
Click to expand!
Issue Type
Bug
Operating System
Ubuntu
Coral Device
USB Accelerator
Other Devices
No response
Programming Language
C++
Relevant Log Output
**LOGS**
Checking readiness of Coral device
EDGE TPU path: /sys/bus/usb/devices/2-1
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.022215
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.012465
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.011841
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.011659
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.014413
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.011502
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.012496
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.012136
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.012898
cv mat size: 640 480
Invoke is done!
1 score:0.902344
time: 0.012129
Thu Dec 9 11:52:59 2021: CNN is ready, example frame was processed
cv mat size: 640 480
Segmentation fault (core dumped)
**GDB out**
0x000000000067d47c in tflite::ops::custom::detection_postprocess::DecodeCenterSizeBoxes(TfLiteContext*, TfLiteNode*, tflite::ops::custom::detection_postprocess::OpData*)
</details>
About this issue
- Original URL
- State: closed
- Created 3 years ago
- Comments: 20
I managed to solve the problem. using a shared pointer to the interpreter and passing the pointer to the invoke execution area.