HazardInspector/lib/qwen_fun_vid.py

import os
from pathlib import Path
import numpy as np
import supervision as sv
import cv2
from concurrent.futures import ThreadPoolExecutor, as_completed

import os
import cv2
from pathlib import Path

def generate_video_to_objects(
    obj_dict: dict[dict],
    input_video_path: str,
    output_dir: str,
) -> None:
    """
    根据 obj_dict 中的物体信息，从原视频中截取附近帧，生成新视频

    :param obj_dict: 包含物体信息的字典，每个元素结构为:
        "0": {
            "class_id": 4,
            "start_frame": 551,
            "end_frame": 597
        },
    :param input_video_path: 输入视频文件路径
    :param output_dir: 输出视频目录
    """
    # 最低秒数
    min_seconds = 2
    # 前后额外帧数
    extra_frames = 5

    print(f"开始抽取物体视频: {input_video_path}")
    print(f"输出目录: {output_dir}")
    # 确保输出目录存在
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    # 1. 打开原始视频
    backends = [
        (cv2.CAP_FFMPEG, 'FFmpeg'),
        (cv2.CAP_DSHOW, 'DirectShow'),
        (cv2.CAP_ANY, 'Default')
    ]

    cap = None
    for backend, backend_name in backends:
        try:
            cap = cv2.VideoCapture(input_video_path, backend)
            if cap.isOpened():
                if backend == cv2.CAP_FFMPEG:
                    try:
                        cap.set(cv2.CAP_PROP_HW_ACCELERATION, cv2.VIDEO_ACCELERATION_ANY)
                        hw_accel = cap.get(cv2.CAP_PROP_HW_ACCELERATION)
                        print(f"FFmpeg硬件加速: {'已启用' if hw_accel > 0 else '未启用'}")
                    except Exception as e:
                        print(f"设置硬件加速失败: {e}")
                print(f"使用后端: {backend_name}")
                break
        except Exception as e:
            print(f"尝试{backend_name}后端失败: {e}")
            continue

    if not cap or not cap.isOpened():
        raise Exception(f"无法打开视频文件: {input_video_path}")

    # 2. 获取视频参数
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    min_frames = int(min_seconds * fps)

    fourcc = cv2.VideoWriter_fourcc(*'avc1')

    # 3. 预处理所有物体的帧范围 + 初始化写入器
    total_objects = len(obj_dict)
    print(f"总共需要处理 {total_objects} 个物体")

    writers = {}  # 存储所有视频写入器
    obj_ranges = {}  # 存储每个物体的起止帧

    for track_id, track_data in obj_dict.items():
        start_idx = max(0, track_data["start_frame"] - extra_frames)
        end_idx = min(total_frames - 1, track_data["end_frame"] + extra_frames)

        # 保证最小长度
        if end_idx - start_idx + 1 < min_frames:
            end_idx = start_idx + min_frames - 1

        output_path = os.path.join(output_dir, f"obj_{int(track_id):03d}.mp4")
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

        if not out.isOpened():
            raise Exception(f"无法初始化视频写入器: {output_path}")

        writers[track_id] = out
        obj_ranges[track_id] = (start_idx, end_idx)
        print(f"物体 {track_id}: 帧 {start_idx} ~ {end_idx} -> {output_path}")

    # 4. 单轮遍历视频，一次性写入所有需要的帧（最高效）
    frame_idx = 0
    print_interval = 50

    while cap.isOpened():
        # 读取一帧
        ret, frame = cap.read()
        if not ret:
            break

        # 遍历所有物体，判断当前帧是否需要写入
        for track_id, (start, end) in obj_ranges.items():
            if start <= frame_idx <= end:
                writers[track_id].write(frame)

        # 进度打印
        if frame_idx % print_interval == 0:
            progress = (frame_idx / total_frames) * 100
            print(f"处理帧: {frame_idx}/{total_frames} ({progress:.1f}%)")

        frame_idx += 1

    # 5. 释放所有写入器
    for track_id, out in writers.items():
        out.release()
        start, end = obj_ranges[track_id]
        print(f"物体 {track_id} 完成 | 总帧数: {end - start + 1}")

    # 6. 释放资源
    cap.release()
    cv2.destroyAllWindows()
    print(f"\n✅ 所有物体视频生成完成！目录: {output_dir}")

def process_track_id(
    track_id: int,
    frame_list: list[tuple[int, list[int]]],
    input_video_path: str,
    output_video_root: str,
    frame_width: int,
    frame_height: int,
    target_fps: int,
    frame_interval: int
) -> str:
    """
    处理单个track_id，生成对应的视频
    """
    # 计算需要生成的总帧数（确保覆盖所有物体帧且不少于两秒）
    min_frames_for_2s = 25 * 2  # 2秒 @ 25fps
    object_based_frames = len(frame_list) * frame_interval
    max_output_frame = max(object_based_frames, min_frames_for_2s)

    # 创建输出视频路径
    output_path = os.path.join(output_video_root, f"{track_id}.mp4")

    # 尝试使用GPU硬件编码
    try:
        # 对于不同平台的GPU编码，使用不同的fourcc
        # Windows平台使用h264_nvenc或h264_amf
        # 如果GPU编码不可用，会回退到CPU编码
        fourcc = cv2.VideoWriter_fourcc(*'h264') # type: ignore
        out = cv2.VideoWriter(output_path, fourcc, target_fps, (frame_width, frame_height))
        # 检查是否成功打开
        if not out.isOpened():
            # 尝试其他编码方式
            fourcc = cv2.VideoWriter_fourcc(*'mp4v') # type: ignore
            out = cv2.VideoWriter(output_path, fourcc, target_fps, (frame_width, frame_height))
    except Exception:
        # 异常时使用CPU编码
        fourcc = cv2.VideoWriter_fourcc(*'mp4v') # type: ignore
        out = cv2.VideoWriter(output_path, fourcc, target_fps, (frame_width, frame_height))

    if not out.isOpened():
        print(f"无法创建视频文件: {output_path}")
        return f"失败: {output_path}"

    # 打开原视频（每个线程独立打开，避免线程安全问题）
    cap = cv2.VideoCapture(input_video_path)
    if not cap.isOpened():
        out.release()
        return f"失败: 无法打开视频 {input_video_path}"

    # 生成视频帧
    current_output_frame = 0
    obj_frame_idx = 0

    while current_output_frame < max_output_frame:
        # 检查当前输出帧是否是5的倍数
        if current_output_frame % frame_interval == 0 and obj_frame_idx < len(frame_list):
            # 这是需要放置物体帧的位置
            original_frame_id, xyxy = frame_list[obj_frame_idx]

            # 设置原视频读取位置
            cap.set(cv2.CAP_PROP_POS_FRAMES, original_frame_id)
            ret, frame = cap.read()

            if not ret:
                # 读取失败，使用黑色帧
                output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
            else:
                # 有数据，截取对应区域
                x1, y1, x2, y2 = map(int, xyxy)
                # 确保坐标在有效范围内
                x1 = max(0, min(x1, frame_width))
                y1 = max(0, min(y1, frame_height))
                x2 = max(0, min(x2, frame_width))
                y2 = max(0, min(y2, frame_height))

                # 创建黑色背景
                output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
                # 将截取的区域放到输出帧中（保持原位置）
                if x2 > x1 and y2 > y1:
                    cropped = frame[y1:y2, x1:x2]
                    output_frame[y1:y2, x1:x2] = cropped

            # 移到下一个物体帧
            obj_frame_idx += 1
        else:
            # 剩余帧留黑
            output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)

        # 写入帧
        out.write(output_frame)
        current_output_frame += 1

    # 释放资源
    out.release()
    cap.release()
    print(f"已生成视频: {output_path}, 共 {current_output_frame} 帧")
    return f"成功: {output_path}"


def frame_all_to_obj_vid(
    json_data: dict,
    input_video_path: str,
    output_video_root: str,
) -> None:
    """
    根据标注数据从原视频中截取物体，生成ai读取专用视频

    参数:
        json_data: 标注数据
        input_video_path: 原视频路径
        output_video_root: 输出视频根目录
    """
    # 确保输出目录存在
    os.makedirs(output_video_root, exist_ok=True)

    # 1. 从 json_data 中提取数据，按 track_id 组织
    track_dict: dict[int, list[tuple[int, list[int]]]] = {}
    # 遍历每一帧
    for frame_id_str, detections in json_data.items():
        frame_id = int(frame_id_str)
        for det in detections:
            track_id = det.get("track_id", -1)
            xyxy = det.get("xyxy", [0, 0, 0, 0])
            if track_id not in track_dict:
                track_dict[track_id] = []
            track_dict[track_id].append((frame_id, xyxy))

    # 2. 获取视频信息（只需要获取一次）
    temp_cap = cv2.VideoCapture(input_video_path)
    if not temp_cap.isOpened():
        raise ValueError(f"无法打开视频: {input_video_path}")

    frame_width = int(temp_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(temp_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    temp_cap.release()

    # 目标fps为25，每5帧取一帧（0, 5, 10...）
    target_fps = 25
    frame_interval = 5  # 每隔5帧取一帧

    # 3. 使用多线程并行处理多个track_id
    # 根据CPU核心数设置线程池大小
    max_workers = min(os.cpu_count() or 4, len(track_dict))
    print(f"使用 {max_workers} 个线程并行处理")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # 提交所有任务
        future_to_track = {
            executor.submit(
                process_track_id,
                track_id,
                frame_list,
                input_video_path,
                output_video_root,
                frame_width,
                frame_height,
                target_fps,
                frame_interval
            ):
            track_id for track_id, frame_list in track_dict.items()
        }

        # 等待所有任务完成
        for future in as_completed(future_to_track):
            track_id = future_to_track[future]
            try:
                result = future.result()
                print(f"Track ID {track_id}: {result}")
            except Exception as e:
                print(f"Track ID {track_id} 处理失败: {str(e)}")

# def frame_all_to_obj_vid(
#     json_data: dict,
#     input_video_path: str,
#     output_video_root: str,
# ) -> None:
#     """
#     根据标注数据从原视频中截取物体，生成ai读取专用视频

#     参数:
#         json_data: 标注数据
#         input_video_path: 原视频路径
#         output_video_root: 输出视频根目录
#     """
#     # 确保输出目录存在
#     os.makedirs(output_video_root, exist_ok=True)

#     # 1. 从 json_data 中提取数据，按 track_id 组织
#     track_dict: dict[int, list[tuple[int, list[int]]]] = {}
#     # 遍历每一帧
#     for frame_id_str, detections in json_data.items():
#         frame_id = int(frame_id_str)
#         for det in detections:
#             track_id = det.get("track_id", -1)
#             xyxy = det.get("xyxy", [0, 0, 0, 0])
#             if track_id not in track_dict:
#                 track_dict[track_id] = []
#             track_dict[track_id].append((frame_id, xyxy))

#     # 2. 打开原视频
#     cap = cv2.VideoCapture(input_video_path)
#     if not cap.isOpened():
#         raise ValueError(f"无法打开视频: {input_video_path}")

#     # 获取原视频信息
#     original_fps = cap.get(cv2.CAP_PROP_FPS)
#     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
#     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

#     # 目标fps为25，每5帧取一帧（0, 5, 10...）
#     target_fps = 25
#     frame_interval = 5  # 每隔5帧取一帧

#     # 为每个 track_id 生成视频
#     for track_id, frame_list in track_dict.items():
#         # 计算需要生成的总帧数（确保覆盖所有物体帧且不少于两秒）
#         min_frames_for_2s = 25 * 2  # 2秒 @ 25fps
#         object_based_frames = len(frame_list) * frame_interval
#         max_output_frame = max(object_based_frames, min_frames_for_2s)

#         # 创建输出视频路径
#         output_path = os.path.join(output_video_root, f"{track_id}.mp4")

#         # 创建视频写入器
#         fourcc = cv2.VideoWriter_fourcc(*'mp4v') # type: ignore
#         out = cv2.VideoWriter(output_path, fourcc, target_fps, (frame_width, frame_height))

#         if not out.isOpened():
#             print(f"无法创建视频文件: {output_path}")
#             continue

#         # 生成视频帧
#         current_output_frame = 0
#         obj_frame_idx = 0

#         while current_output_frame < max_output_frame:
#             # 检查当前输出帧是否是5的倍数
#             if current_output_frame % frame_interval == 0 and obj_frame_idx < len(frame_list):
#                 # 这是需要放置物体帧的位置
#                 original_frame_id, xyxy = frame_list[obj_frame_idx]

#                 # 设置原视频读取位置
#                 cap.set(cv2.CAP_PROP_POS_FRAMES, original_frame_id)
#                 ret, frame = cap.read()

#                 if not ret:
#                     # 读取失败，使用黑色帧
#                     output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
#                 else:
#                     # 有数据，截取对应区域
#                     x1, y1, x2, y2 = map(int, xyxy)
#                     # 确保坐标在有效范围内
#                     x1 = max(0, min(x1, frame_width))
#                     y1 = max(0, min(y1, frame_height))
#                     x2 = max(0, min(x2, frame_width))
#                     y2 = max(0, min(y2, frame_height))

#                     # 创建黑色背景
#                     output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
#                     # 将截取的区域放到输出帧中（保持原位置）
#                     if x2 > x1 and y2 > y1:
#                         cropped = frame[y1:y2, x1:x2]
#                         output_frame[y1:y2, x1:x2] = cropped

#                 # 移到下一个物体帧
#                 obj_frame_idx += 1
#             else:
#                 # 剩余帧留黑
#                 output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)

#             # 写入帧
#             out.write(output_frame)
#             current_output_frame += 1

#         # 释放视频写入器
#         out.release()
#         print(f"已生成视频: {output_path}, 共 {current_output_frame} 帧")

#     # 释放原视频
#     cap.release()


def create_mian_vid_for_ai(
    input_video_path: str,
    output_folder: str
) -> str:
    """
    将原始视频的第0,1,2...帧映射到新视频的0,5,10...帧，其他帧留黑

    参数:
        input_video_path: 原始视频路径
        output_folder: 输出文件夹路径
    返回:
        str: 输出视频路径
    """
    # 确保输出目录存在
    os.makedirs(output_folder, exist_ok=True)

    # 构建输出视频路径
    output_video_path = os.path.join(output_folder, "mian_vid_ai.mp4")

    # 打开原视频
    cap = cv2.VideoCapture(input_video_path)
    if not cap.isOpened():
        raise ValueError(f"无法打开视频: {input_video_path}")

    # 获取原视频信息
    original_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # 目标fps为25，每5帧取一帧（0, 5, 10...）
    target_fps = 25
    frame_interval = 5

    # 计算输出视频的总帧数
    # 确保覆盖所有原始帧且不少于两秒
    min_frames_for_2s = 25 * 2  # 2秒 @ 25fps
    max_output_frame = max(total_frames * frame_interval, min_frames_for_2s)

    # 创建视频写入器
    fourcc = cv2.VideoWriter_fourcc(*'mp4v') # type: ignore
    out = cv2.VideoWriter(output_video_path, fourcc, target_fps, (frame_width, frame_height))

    if not out.isOpened():
        raise ValueError(f"无法创建视频文件: {output_video_path}")

    # 生成视频帧
    current_output_frame = 0
    original_frame_idx = 0

    while current_output_frame < max_output_frame:
        # 检查当前输出帧是否是5的倍数
        if current_output_frame % frame_interval == 0 and original_frame_idx < total_frames:
            # 这是需要放置原始帧的位置
            # 设置原视频读取位置
            cap.set(cv2.CAP_PROP_POS_FRAMES, original_frame_idx)
            ret, frame = cap.read()

            if not ret:
                # 读取失败，使用黑色帧
                output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
            else:
                # 有数据，使用原始帧
                output_frame = frame

            # 移到下一个原始帧
            original_frame_idx += 1
        else:
            # 剩余帧留黑
            output_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)

        # 写入帧
        out.write(output_frame)
        current_output_frame += 1

    # 释放资源
    cap.release()
    out.release()
    print(f"已生成视频: {output_video_path}, 共 {current_output_frame} 帧")

    return output_video_path