#include "VideoToAI.hpp" #include "hailo/hailort_common.hpp" namespace VideoToAI { // Platform-specific memory allocation std::shared_ptr page_aligned_alloc(size_t size) { #if defined(__unix__) std::cout << "[DEBUG] Attempting to allocate " << size << " bytes using mmap." << std::endl; auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (MAP_FAILED == addr) { std::cerr << "[ERROR] mmap failed to allocate memory. Requested size: " << size << " bytes." << std::endl; throw std::bad_alloc(); } return std::shared_ptr(reinterpret_cast(addr), [size](void* addr) { munmap(addr, size); }); #elif defined(_MSC_VER) std::cout << "[DEBUG] Attempting to allocate " << size << " bytes using VirtualAlloc." << std::endl; auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); if (!addr) { std::cerr << "[ERROR] VirtualAlloc failed to allocate memory. Requested size: " << size << " bytes." << std::endl; throw std::bad_alloc(); } return std::shared_ptr(reinterpret_cast(addr), [](void* addr) { VirtualFree(addr, 0, MEM_RELEASE); }); #else #pragma error("Aligned alloc not supported") #endif } // Resize and pad the image to fit the new shape void letterbox(const cv::Mat& image, cv::Mat& outImage, const cv::Size& newShape, const cv::Scalar& color, bool scaleUp) { cv::Size shape = image.size(); float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width); if (!scaleUp) r = std::min(r, 1.0f); int newWidth = (int)std::round((float)shape.width * r); int newHeight = (int)std::round((float)shape.height * r); auto dw = newShape.width - newWidth; auto dh = newShape.height - newHeight; dw /= 2; dh /= 2; cv::resize(image, outImage, cv::Size(newWidth, newHeight), 0, 0, cv::INTER_LINEAR); cv::copyMakeBorder(outImage, outImage, int(std::round(dh)), int(std::round(dh)), int(std::round(dw)), int(std::round(dw)), cv::BORDER_CONSTANT, color); } // Map class IDs to names std::string get_coco_name_from_int(int cls) { static const std::vector cocoNames = { "__background__", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" }; if (cls >= 0 && cls < cocoNames.size()) { return cocoNames[cls]; } return "Unknown"; } // Perform inference on Y and UV planes void hailoInference(cv::Mat& yPlane, cv::Mat& uvPlane, ConfiguredInferModel& configured_infer_model, std::shared_ptr& infer_model, VDevice* vdevice, std::vector& detection_results) { try { std::cout << "[INFO] Starting Hailo inference." << std::endl; std::vector> buffer_guards; std::vector buffer_map_guards; size_t input_frame_size = yPlane.total() + uvPlane.total(); const auto Y_PLANE_SIZE = static_cast(input_frame_size * 2 / 3); const auto UV_PLANE_SIZE = static_cast(input_frame_size * 1 / 3); // Allocate and map Y-plane buffer auto y_plane_buffer = page_aligned_alloc(Y_PLANE_SIZE); memcpy(y_plane_buffer.get(), yPlane.data, Y_PLANE_SIZE); auto input_mapping_y = DmaMappedBuffer::create(*vdevice, y_plane_buffer.get(), Y_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map Y-plane buffer"); buffer_guards.push_back(y_plane_buffer); // Allocate and map UV-plane buffer auto uv_plane_buffer = page_aligned_alloc(UV_PLANE_SIZE); memcpy(uv_plane_buffer.get(), uvPlane.data, UV_PLANE_SIZE); auto input_mapping_uv = DmaMappedBuffer::create(*vdevice, uv_plane_buffer.get(), UV_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map UV-plane buffer"); buffer_guards.push_back(uv_plane_buffer); // Configure input and output buffers hailo_pix_buffer_t pix_buffer{}; pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR; pix_buffer.number_of_planes = 2; pix_buffer.planes[0].bytes_used = Y_PLANE_SIZE; pix_buffer.planes[0].plane_size = Y_PLANE_SIZE; pix_buffer.planes[0].user_ptr = reinterpret_cast(y_plane_buffer.get()); pix_buffer.planes[1].bytes_used = UV_PLANE_SIZE; pix_buffer.planes[1].plane_size = UV_PLANE_SIZE; pix_buffer.planes[1].user_ptr = reinterpret_cast(uv_plane_buffer.get()); auto bindings = configured_infer_model.create_bindings().expect("Failed to create infer bindings"); auto input_name = infer_model->get_input_names().at(0); auto input_status = bindings.input(input_name)->set_pix_buffer(pix_buffer); if (HAILO_SUCCESS != input_status) { throw hailort_error(input_status, "Failed to set infer input buffer"); } // Allocate output buffer auto output_name = infer_model->get_output_names().at(0); size_t output_frame_size = infer_model->output(output_name)->get_frame_size(); auto output_buffer = page_aligned_alloc(output_frame_size); buffer_guards.push_back(output_buffer); auto output_mapping = DmaMappedBuffer::create(*vdevice, output_buffer.get(), output_frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H).expect("Failed to map output buffer"); buffer_map_guards.push_back(std::move(output_mapping)); auto output_status = bindings.output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size)); if (HAILO_SUCCESS != output_status) { throw hailort_error(output_status, "Failed to set infer output buffer"); } std::cout << "[INFO] Configured input and output buffers." << std::endl; // Run asynchronous inference with a callback configured_infer_model.run_async(bindings, [&uvPlane, &detection_results, output_buffer](const AsyncInferCompletionInfo& completion_info) { try { if (completion_info.status != HAILO_SUCCESS) { std::cerr << "[ERROR] Asynchronous inference failed with status: " << completion_info.status << std::endl; return; } std::cout << "[INFO] Asynchronous inference callback triggered." << std::endl; // catch model output const float* detections = reinterpret_cast(output_buffer.get()); if (!detections) { std::cerr << "[ERROR] Output buffer is null." << std::endl; return; } int max_classes = 8; // Max class num int u_value = 90, v_value = 50; // Green line for (int class_id = 1; class_id <= max_classes; class_id++) { int num_detections = static_cast(detections[0]); // frist float means detection num detections += 1; if (num_detections == 0) { continue; // if this class no detection result , skip } std::cout << "[INFO] Class " << class_id << ": Number of detections = " << num_detections << std::endl; for (int i = 0; i < num_detections; i++) { hailo_bbox_float32_t bbox = reinterpret_cast(detections)[i]; if (bbox.score <= 0.3) { continue; } std::string class_name = get_coco_name_from_int(class_id); std::string label = class_name + " " + cv::format("%.2f", bbox.score); // draw the range int x = std::max(0, static_cast(bbox.x_min * uvPlane.cols)); int y = std::max(0, static_cast(bbox.y_min * uvPlane.cols)); int w = std::min(static_cast((bbox.x_max - bbox.x_min) * uvPlane.cols), uvPlane.cols - x); int h = std::min(static_cast((bbox.y_max - bbox.y_min) * uvPlane.cols), uvPlane.cols - y); int top = std::max(0, y / 2); int bottom = std::min((y + h) / 2, uvPlane.cols - 1); int left = std::max(0, x / 2); int right = std::min((x + w) / 2, uvPlane.cols - 1); uvPlane.row(top).colRange(left * 2, right * 2 + 1).setTo(cv::Scalar(u_value, v_value)); uvPlane.row(bottom).colRange(left * 2, right * 2 + 1).setTo(cv::Scalar(u_value, v_value)); uvPlane.col(left * 2).rowRange(top, bottom + 1).setTo(cv::Scalar(v_value, u_value)); uvPlane.col(right * 2).rowRange(top, bottom + 1).setTo(cv::Scalar(v_value, u_value)); // put class name and score int text_x = left * 2; int text_y = std::max(0, top - 1); cv::putText(uvPlane, label, cv::Point(text_x, text_y), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0, 255, 0), 1.2); DetectionResult result = { bbox, class_id, bbox.score }; detection_results.push_back(result); } detections += num_detections * (sizeof(hailo_bbox_float32_t) / sizeof(float)); } } catch (const std::exception& e) { std::cerr << "[ERROR] Exception in callback: " << e.what() << std::endl; } }); } catch (const hailort_error& e) { std::cerr << "[ERROR] Hailo inference failed: " << e.what() << std::endl; throw; } catch (const std::exception& e) { std::cerr << "[ERROR] General error in hailoInference: " << e.what() << std::endl; throw; } } }