HailoYUV/VideoToAI.cpp

#include "VideoToAI.hpp"
#include "hailo/hailort_common.hpp"

namespace VideoToAI
{
    // Platform-specific memory allocation
    std::shared_ptr<uint8_t> page_aligned_alloc(size_t size)
    {
#if defined(__unix__)
        std::cout << "[DEBUG] Attempting to allocate " << size << " bytes using mmap." << std::endl;
        auto addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
        if (MAP_FAILED == addr) {
            std::cerr << "[ERROR] mmap failed to allocate memory. Requested size: " << size << " bytes." << std::endl;
            throw std::bad_alloc();
        }
        return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [size](void* addr) { munmap(addr, size); });

#elif defined(_MSC_VER)
        std::cout << "[DEBUG] Attempting to allocate " << size << " bytes using VirtualAlloc." << std::endl;
        auto addr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
        if (!addr) {
            std::cerr << "[ERROR] VirtualAlloc failed to allocate memory. Requested size: " << size << " bytes." << std::endl;
            throw std::bad_alloc();
        }
        return std::shared_ptr<uint8_t>(reinterpret_cast<uint8_t*>(addr), [](void* addr) { VirtualFree(addr, 0, MEM_RELEASE); });

#else
#pragma error("Aligned alloc not supported")
#endif
    }

    // Resize and pad the image to fit the new shape
    void letterbox(const cv::Mat& image, cv::Mat& outImage,
        const cv::Size& newShape, const cv::Scalar& color, bool scaleUp)
    {
        cv::Size shape = image.size();

        float r = std::min((float)newShape.height / (float)shape.height,
            (float)newShape.width / (float)shape.width);
        if (!scaleUp)
            r = std::min(r, 1.0f);

        int newWidth = (int)std::round((float)shape.width * r);
        int newHeight = (int)std::round((float)shape.height * r);

        auto dw = newShape.width - newWidth;
        auto dh = newShape.height - newHeight;

        dw /= 2;
        dh /= 2;

        cv::resize(image, outImage, cv::Size(newWidth, newHeight), 0, 0, cv::INTER_LINEAR);
        cv::copyMakeBorder(outImage, outImage, int(std::round(dh)), int(std::round(dh)),
            int(std::round(dw)), int(std::round(dw)), cv::BORDER_CONSTANT, color);
    }

    // Map class IDs to names
    std::string get_coco_name_from_int(int cls)
    {
        static const std::vector<std::string> cocoNames = {
            "__background__", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
            "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
            "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
            "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
            "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon",
            "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
            "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
            "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
            "scissors", "teddy bear", "hair drier", "toothbrush"
        };

        if (cls >= 0 && cls < cocoNames.size()) {
            return cocoNames[cls];
        }
        return "Unknown";
    }

    // Perform inference on Y and UV planes
    void hailoInference(cv::Mat& yPlane, cv::Mat& uvPlane, ConfiguredInferModel& configured_infer_model, std::shared_ptr<InferModel>& infer_model, VDevice* vdevice, std::vector<DetectionResult>& detection_results) {
        try {
            std::cout << "[INFO] Starting Hailo inference." << std::endl;

            std::vector<std::shared_ptr<uint8_t>> buffer_guards;
            std::vector<DmaMappedBuffer> buffer_map_guards;

            size_t input_frame_size = yPlane.total() + uvPlane.total();
            const auto Y_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 2 / 3);
            const auto UV_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 1 / 3);

            // Allocate and map Y-plane buffer
            auto y_plane_buffer = page_aligned_alloc(Y_PLANE_SIZE);
            memcpy(y_plane_buffer.get(), yPlane.data, Y_PLANE_SIZE);
            auto input_mapping_y = DmaMappedBuffer::create(*vdevice, y_plane_buffer.get(), Y_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map Y-plane buffer");
            buffer_guards.push_back(y_plane_buffer);

            // Allocate and map UV-plane buffer
            auto uv_plane_buffer = page_aligned_alloc(UV_PLANE_SIZE);
            memcpy(uv_plane_buffer.get(), uvPlane.data, UV_PLANE_SIZE);
            auto input_mapping_uv = DmaMappedBuffer::create(*vdevice, uv_plane_buffer.get(), UV_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map UV-plane buffer");
            buffer_guards.push_back(uv_plane_buffer);

            // Configure input and output buffers
            hailo_pix_buffer_t pix_buffer{};
            pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
            pix_buffer.number_of_planes = 2;
            pix_buffer.planes[0].bytes_used = Y_PLANE_SIZE;
            pix_buffer.planes[0].plane_size = Y_PLANE_SIZE;
            pix_buffer.planes[0].user_ptr = reinterpret_cast<void*>(y_plane_buffer.get());
            pix_buffer.planes[1].bytes_used = UV_PLANE_SIZE;
            pix_buffer.planes[1].plane_size = UV_PLANE_SIZE;
            pix_buffer.planes[1].user_ptr = reinterpret_cast<void*>(uv_plane_buffer.get());

            auto bindings = configured_infer_model.create_bindings().expect("Failed to create infer bindings");
            auto input_name = infer_model->get_input_names().at(0);
            auto input_status = bindings.input(input_name)->set_pix_buffer(pix_buffer);
            if (HAILO_SUCCESS != input_status) {
                throw hailort_error(input_status, "Failed to set infer input buffer");
            }

            // Allocate output buffer
            auto output_name = infer_model->get_output_names().at(0);
            size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
            auto output_buffer = page_aligned_alloc(output_frame_size);
            buffer_guards.push_back(output_buffer);
            auto output_mapping = DmaMappedBuffer::create(*vdevice, output_buffer.get(), output_frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H).expect("Failed to map output buffer");
            buffer_map_guards.push_back(std::move(output_mapping));
            auto output_status = bindings.output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
            if (HAILO_SUCCESS != output_status) {
                throw hailort_error(output_status, "Failed to set infer output buffer");
            }

            std::cout << "[INFO] Configured input and output buffers." << std::endl;

            // Run asynchronous inference with a callback
            configured_infer_model.run_async(bindings, [&uvPlane, &detection_results, output_buffer](const AsyncInferCompletionInfo& completion_info) {
                try {
                    if (completion_info.status != HAILO_SUCCESS) {
                        std::cerr << "[ERROR] Asynchronous inference failed with status: " << completion_info.status << std::endl;
                        return;
                    }

                    std::cout << "[INFO] Asynchronous inference callback triggered." << std::endl;

                    // catch model output
                    const float* detections = reinterpret_cast<const float*>(output_buffer.get());
                    if (!detections) {
                        std::cerr << "[ERROR] Output buffer is null." << std::endl;
                        return;
                    }
                    int max_classes = 8; // Max class num
                    int u_value = 90, v_value = 50;  // Green line
                    for (int class_id = 1; class_id <= max_classes; class_id++) {
                        int num_detections = static_cast<int>(detections[0]); // frist  float means detection num
                        detections += 1;

                        if (num_detections == 0) {
                            continue; // if this class no detection result , skip
                        }

                        std::cout << "[INFO] Class " << class_id << ": Number of detections = " << num_detections << std::endl;

                        for (int i = 0; i < num_detections; i++) {
                            hailo_bbox_float32_t bbox = reinterpret_cast<const hailo_bbox_float32_t*>(detections)[i];
                            if (bbox.score <= 0.3) {
                                continue;
                            }

                            std::string class_name = get_coco_name_from_int(class_id);
                            std::string label = class_name + " " + cv::format("%.2f", bbox.score);

                            // draw the range
                            int x = std::max(0, static_cast<int>(bbox.x_min * uvPlane.cols));
                            int y = std::max(0, static_cast<int>(bbox.y_min * uvPlane.cols));
                            int w = std::min(static_cast<int>((bbox.x_max - bbox.x_min) * uvPlane.cols), uvPlane.cols - x);
                            int h = std::min(static_cast<int>((bbox.y_max - bbox.y_min) * uvPlane.cols), uvPlane.cols - y);

                            int top = std::max(0, y / 2);
                            int bottom = std::min((y + h) / 2, uvPlane.cols - 1);
                            int left = std::max(0, x / 2);
                            int right = std::min((x + w) / 2, uvPlane.cols - 1);

                            uvPlane.row(top).colRange(left * 2, right * 2 + 1).setTo(cv::Scalar(u_value, v_value));
                            uvPlane.row(bottom).colRange(left * 2, right * 2 + 1).setTo(cv::Scalar(u_value, v_value));
                            uvPlane.col(left * 2).rowRange(top, bottom + 1).setTo(cv::Scalar(v_value, u_value));
                            uvPlane.col(right * 2).rowRange(top, bottom + 1).setTo(cv::Scalar(v_value, u_value));

                            // put class name and score
                            int text_x = left * 2;
                            int text_y = std::max(0, top - 1);

                            cv::putText(uvPlane, label, cv::Point(text_x, text_y),
                                cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0, 255, 0), 1.2);
                            DetectionResult result = { bbox, class_id, bbox.score };
                            detection_results.push_back(result);
                        }

                        detections += num_detections * (sizeof(hailo_bbox_float32_t) / sizeof(float));
                    }
                }
                catch (const std::exception& e) {
                    std::cerr << "[ERROR] Exception in callback: " << e.what() << std::endl;
                }
                });


        }
        catch (const hailort_error& e) {
            std::cerr << "[ERROR] Hailo inference failed: " << e.what() << std::endl;
            throw;
        }
        catch (const std::exception& e) {
            std::cerr << "[ERROR] General error in hailoInference: " << e.what() << std::endl;
            throw;
        }
    }


}