You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
219 lines
11 KiB
219 lines
11 KiB
#include "VideoToAI.hpp" |
|
#include "hailo/hailort_common.hpp" |
|
|
|
namespace VideoToAI |
|
{ |
|
// Platform-specific memory allocation |
|
std::shared_ptr<uint8_t> page_aligned_alloc(size_t size) |
|
{ |
|
#if defined(__unix__) |
|
std::cout << "[DEBUG] Attempting to allocate " << size << " bytes using mmap." << std::endl; |
|
auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); |
|
if (MAP_FAILED == addr) { |
|
std::cerr << "[ERROR] mmap failed to allocate memory. Requested size: " << size << " bytes." << std::endl; |
|
throw std::bad_alloc(); |
|
} |
|
return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [size](void* addr) { munmap(addr, size); }); |
|
|
|
#elif defined(_MSC_VER) |
|
std::cout << "[DEBUG] Attempting to allocate " << size << " bytes using VirtualAlloc." << std::endl; |
|
auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); |
|
if (!addr) { |
|
std::cerr << "[ERROR] VirtualAlloc failed to allocate memory. Requested size: " << size << " bytes." << std::endl; |
|
throw std::bad_alloc(); |
|
} |
|
return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [](void* addr) { VirtualFree(addr, 0, MEM_RELEASE); }); |
|
|
|
#else |
|
#pragma error("Aligned alloc not supported") |
|
#endif |
|
} |
|
|
|
// Resize and pad the image to fit the new shape |
|
void letterbox(const cv::Mat& image, cv::Mat& outImage, |
|
const cv::Size& newShape, const cv::Scalar& color, bool scaleUp) |
|
{ |
|
cv::Size shape = image.size(); |
|
|
|
float r = std::min((float)newShape.height / (float)shape.height, |
|
(float)newShape.width / (float)shape.width); |
|
if (!scaleUp) |
|
r = std::min(r, 1.0f); |
|
|
|
int newWidth = (int)std::round((float)shape.width * r); |
|
int newHeight = (int)std::round((float)shape.height * r); |
|
|
|
auto dw = newShape.width - newWidth; |
|
auto dh = newShape.height - newHeight; |
|
|
|
dw /= 2; |
|
dh /= 2; |
|
|
|
cv::resize(image, outImage, cv::Size(newWidth, newHeight), 0, 0, cv::INTER_LINEAR); |
|
cv::copyMakeBorder(outImage, outImage, int(std::round(dh)), int(std::round(dh)), |
|
int(std::round(dw)), int(std::round(dw)), cv::BORDER_CONSTANT, color); |
|
} |
|
|
|
// Map class IDs to names |
|
std::string get_coco_name_from_int(int cls) |
|
{ |
|
static const std::vector<std::string> cocoNames = { |
|
"__background__", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", |
|
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", |
|
"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", |
|
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", |
|
"skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", |
|
"bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", |
|
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", |
|
"keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", |
|
"scissors", "teddy bear", "hair drier", "toothbrush" |
|
}; |
|
|
|
if (cls >= 0 && cls < cocoNames.size()) { |
|
return cocoNames[cls]; |
|
} |
|
return "Unknown"; |
|
} |
|
|
|
// Perform inference on Y and UV planes |
|
void hailoInference(cv::Mat& yPlane, cv::Mat& uvPlane, ConfiguredInferModel& configured_infer_model, std::shared_ptr<InferModel>& infer_model, VDevice* vdevice, std::vector<DetectionResult>& detection_results) { |
|
try { |
|
std::cout << "[INFO] Starting Hailo inference." << std::endl; |
|
|
|
std::vector<std::shared_ptr<uint8_t>> buffer_guards; |
|
std::vector<DmaMappedBuffer> buffer_map_guards; |
|
|
|
size_t input_frame_size = yPlane.total() + uvPlane.total(); |
|
const auto Y_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 2 / 3); |
|
const auto UV_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 1 / 3); |
|
|
|
// Allocate and map Y-plane buffer |
|
auto y_plane_buffer = page_aligned_alloc(Y_PLANE_SIZE); |
|
memcpy(y_plane_buffer.get(), yPlane.data, Y_PLANE_SIZE); |
|
auto input_mapping_y = DmaMappedBuffer::create(*vdevice, y_plane_buffer.get(), Y_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map Y-plane buffer"); |
|
buffer_guards.push_back(y_plane_buffer); |
|
|
|
// Allocate and map UV-plane buffer |
|
auto uv_plane_buffer = page_aligned_alloc(UV_PLANE_SIZE); |
|
memcpy(uv_plane_buffer.get(), uvPlane.data, UV_PLANE_SIZE); |
|
auto input_mapping_uv = DmaMappedBuffer::create(*vdevice, uv_plane_buffer.get(), UV_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map UV-plane buffer"); |
|
buffer_guards.push_back(uv_plane_buffer); |
|
|
|
// Configure input and output buffers |
|
hailo_pix_buffer_t pix_buffer{}; |
|
pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR; |
|
pix_buffer.number_of_planes = 2; |
|
pix_buffer.planes[0].bytes_used = Y_PLANE_SIZE; |
|
pix_buffer.planes[0].plane_size = Y_PLANE_SIZE; |
|
pix_buffer.planes[0].user_ptr = reinterpret_cast<void*>(y_plane_buffer.get()); |
|
pix_buffer.planes[1].bytes_used = UV_PLANE_SIZE; |
|
pix_buffer.planes[1].plane_size = UV_PLANE_SIZE; |
|
pix_buffer.planes[1].user_ptr = reinterpret_cast<void*>(uv_plane_buffer.get()); |
|
|
|
auto bindings = configured_infer_model.create_bindings().expect("Failed to create infer bindings"); |
|
auto input_name = infer_model->get_input_names().at(0); |
|
auto input_status = bindings.input(input_name)->set_pix_buffer(pix_buffer); |
|
if (HAILO_SUCCESS != input_status) { |
|
throw hailort_error(input_status, "Failed to set infer input buffer"); |
|
} |
|
|
|
// Allocate output buffer |
|
auto output_name = infer_model->get_output_names().at(0); |
|
size_t output_frame_size = infer_model->output(output_name)->get_frame_size(); |
|
auto output_buffer = page_aligned_alloc(output_frame_size); |
|
buffer_guards.push_back(output_buffer); |
|
auto output_mapping = DmaMappedBuffer::create(*vdevice, output_buffer.get(), output_frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H).expect("Failed to map output buffer"); |
|
buffer_map_guards.push_back(std::move(output_mapping)); |
|
auto output_status = bindings.output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size)); |
|
if (HAILO_SUCCESS != output_status) { |
|
throw hailort_error(output_status, "Failed to set infer output buffer"); |
|
} |
|
|
|
std::cout << "[INFO] Configured input and output buffers." << std::endl; |
|
|
|
// Run asynchronous inference with a callback |
|
configured_infer_model.run_async(bindings, [&uvPlane, &detection_results, output_buffer](const AsyncInferCompletionInfo& completion_info) { |
|
try { |
|
if (completion_info.status != HAILO_SUCCESS) { |
|
std::cerr << "[ERROR] Asynchronous inference failed with status: " << completion_info.status << std::endl; |
|
return; |
|
} |
|
|
|
std::cout << "[INFO] Asynchronous inference callback triggered." << std::endl; |
|
|
|
// catch model output |
|
const float* detections = reinterpret_cast<const float*>(output_buffer.get()); |
|
if (!detections) { |
|
std::cerr << "[ERROR] Output buffer is null." << std::endl; |
|
return; |
|
} |
|
int max_classes = 8; // Max class num |
|
int u_value = 90, v_value = 50; // Green line |
|
for (int class_id = 1; class_id <= max_classes; class_id++) { |
|
int num_detections = static_cast<int>(detections[0]); // frist float means detection num |
|
detections += 1; |
|
|
|
if (num_detections == 0) { |
|
continue; // if this class no detection result , skip |
|
} |
|
|
|
std::cout << "[INFO] Class " << class_id << ": Number of detections = " << num_detections << std::endl; |
|
|
|
for (int i = 0; i < num_detections; i++) { |
|
hailo_bbox_float32_t bbox = reinterpret_cast<const hailo_bbox_float32_t*>(detections)[i]; |
|
if (bbox.score <= 0.3) { |
|
continue; |
|
} |
|
|
|
std::string class_name = get_coco_name_from_int(class_id); |
|
std::string label = class_name + " " + cv::format("%.2f", bbox.score); |
|
|
|
// draw the range |
|
int x = std::max(0, static_cast<int>(bbox.x_min * uvPlane.cols)); |
|
int y = std::max(0, static_cast<int>(bbox.y_min * uvPlane.cols)); |
|
int w = std::min(static_cast<int>((bbox.x_max - bbox.x_min) * uvPlane.cols), uvPlane.cols - x); |
|
int h = std::min(static_cast<int>((bbox.y_max - bbox.y_min) * uvPlane.cols), uvPlane.cols - y); |
|
|
|
int top = std::max(0, y / 2); |
|
int bottom = std::min((y + h) / 2, uvPlane.cols - 1); |
|
int left = std::max(0, x / 2); |
|
int right = std::min((x + w) / 2, uvPlane.cols - 1); |
|
|
|
uvPlane.row(top).colRange(left * 2, right * 2 + 1).setTo(cv::Scalar(u_value, v_value)); |
|
uvPlane.row(bottom).colRange(left * 2, right * 2 + 1).setTo(cv::Scalar(u_value, v_value)); |
|
uvPlane.col(left * 2).rowRange(top, bottom + 1).setTo(cv::Scalar(v_value, u_value)); |
|
uvPlane.col(right * 2).rowRange(top, bottom + 1).setTo(cv::Scalar(v_value, u_value)); |
|
|
|
// put class name and score |
|
int text_x = left * 2; |
|
int text_y = std::max(0, top - 1); |
|
|
|
cv::putText(uvPlane, label, cv::Point(text_x, text_y), |
|
cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0, 255, 0), 1.2); |
|
DetectionResult result = { bbox, class_id, bbox.score }; |
|
detection_results.push_back(result); |
|
} |
|
|
|
detections += num_detections * (sizeof(hailo_bbox_float32_t) / sizeof(float)); |
|
} |
|
} |
|
catch (const std::exception& e) { |
|
std::cerr << "[ERROR] Exception in callback: " << e.what() << std::endl; |
|
} |
|
}); |
|
|
|
|
|
|
|
} |
|
catch (const hailort_error& e) { |
|
std::cerr << "[ERROR] Hailo inference failed: " << e.what() << std::endl; |
|
throw; |
|
} |
|
catch (const std::exception& e) { |
|
std::cerr << "[ERROR] General error in hailoInference: " << e.what() << std::endl; |
|
throw; |
|
} |
|
} |
|
|
|
|
|
}
|
|
|