添加耗时记录

2026-04-30 17:55:44 +08:00 · 2026-04-30 17:55:44 +08:00 · cebfddd13b
parent 1e9906ef1a
commit cebfddd13b
1 changed files with 74 additions and 18 deletions
--- a/main.py
+++ b/main.py
@ -135,7 +135,7 @@ def extract_wav(video_path: Path, temp_dir: Path) -> Optional[Path]:



-def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
+def process_batch_diarization(video_paths, max_workers=1):
    """
    第一阶段：批量执行说话人分离（主进程顺序处理）

@ -144,7 +144,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
        max_workers: 并发数（目前固定为 1）

    Returns:
-        Dict[video_path -> diar_result_path]: 说话人分离结果映射
+        字典：video_path -> diar_result_path
    """
    print("=" * 60)
    print("第一阶段：批量说话人分离")
@ -172,6 +172,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):

    # 顺序处理每个视频
    for i, video_path in enumerate(video_paths, 1):
+        video_start_time = time.time()
        try:
            print(f"\n[{i}/{len(video_paths)}] 处理：{video_path.name}")

@ -179,6 +180,12 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
            wav_path = extract_wav(video_path, TEMP_DIR)
            if wav_path is None:
                print(f"  ✗ 音频提取失败")
+                results[video_path] = {
+                    "success": False,
+                    "diar_result": None,
+                    "error": "音频提取失败",
+                    "process_time": time.time() - video_start_time
+                }
                continue

            # 2. 执行说话人分离
@ -186,6 +193,12 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):

            if not diar_segments:
                print(f"  ✗ 说话人分离结果为空")
+                results[video_path] = {
+                    "success": False,
+                    "diar_result": None,
+                    "error": "说话人分离结果为空",
+                    "process_time": time.time() - video_start_time
+                }
                continue

            # 3. 保存说话人分离结果（临时文件）
@ -196,16 +209,29 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
            from map_speaker import save_json
            save_json(temp_diar_path, diar_result)

-            results[video_path] = str(temp_diar_path)
-            print(f"  ✓ 说话人分离完成")
+            video_process_time = time.time() - video_start_time
+            results[video_path] = {
+                "success": True,
+                "diar_result": str(temp_diar_path),
+                "error": None,
+                "process_time": video_process_time
+            }
+            print(f"  ✓ 说话人分离完成 (耗时：{video_process_time:.1f}s)")

            # 4. 清理临时 WAV（保留用于后续 ASR）
            # 注意：这里不删除，ASR 阶段还需要

        except Exception as e:
            import traceback
+            video_process_time = time.time() - video_start_time
            print(f"  ✗ 处理失败：{e}")
            traceback.print_exc()
+            results[video_path] = {
+                "success": False,
+                "diar_result": None,
+                "error": str(e),
+                "process_time": video_process_time
+            }

        # 显示进度
        elapsed = time.time() - start_time
@ -222,7 +248,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
    return results


-def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers: int = 1):
+def process_batch_asr(video_paths, diar_results, max_workers=1):
    """
    第二阶段：批量执行 ASR 识别并合并结果（主进程顺序处理）

@ -232,7 +258,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
        max_workers: 并发数（目前固定为 1）

    Returns:
-        List[Dict]: 最终结果列表
+        列表：最终结果列表
    """
    print("=" * 60)
    print("第二阶段：批量语音识别 + 合并结果")
@ -255,13 +281,39 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:

    # 顺序处理每个视频
    for i, video_path in enumerate(video_paths, 1):
-        diar_path = diar_results.get(video_path)
-        if not diar_path:
+        video_start_time = time.time()
+        diar_info = diar_results.get(video_path)
+
+        # 检查第一阶段的结果（现在是字典结构）
+        if not diar_info:
            print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}（无说话人分离结果）")
            results.append({
                "video": str(video_path),
                "success": False,
-                "error": "无说话人分离结果"
+                "error": "无说话人分离结果",
+                "process_time": 0.0
+            })
+            continue
+
+        # 如果第一阶段失败，跳过该视频
+        if not diar_info.get("success"):
+            print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}（第一阶段失败：{diar_info.get('error')})")
+            results.append({
+                "video": str(video_path),
+                "success": False,
+                "error": f"说话人分离失败：{diar_info.get('error')}",
+                "process_time": diar_info.get("process_time", 0.0)
+            })
+            continue
+
+        diar_path = diar_info.get("diar_result")
+        if not diar_path:
+            print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}（无说话人分离结果文件）")
+            results.append({
+                "video": str(video_path),
+                "success": False,
+                "error": "说话人分离结果文件不存在",
+                "process_time": 0.0
            })
            continue

@ -279,7 +331,8 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
                    results.append({
                        "video": str(video_path),
                        "success": False,
-                        "error": "音频提取失败"
+                        "error": "音频提取失败",
+                        "process_time": time.time() - video_start_time
                    })
                    continue

@ -295,7 +348,8 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
                results.append({
                    "video": str(video_path),
                    "success": False,
-                    "error": "ASR 识别结果为空"
+                    "error": "ASR 识别结果为空",
+                    "process_time": time.time() - video_start_time
                })
                continue

@ -328,7 +382,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:

            # 5. 保存最终结果
            output_file = OUTPUT_DIR / f"{video_path.stem}_result.json"
-            # 确保 asr_sentences 是 List[Sentence] 类型
+            # 确保 asr_sentences 是列表类型
            if isinstance(asr_sentences, dict):
                # 如果是字典，尝试获取 sentences 键或转换为空列表
                asr_sentences = asr_sentences.get("sentences", [])
@ -341,27 +395,31 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
                speaker = sentence.speaker
                speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1

+            video_process_time = time.time() - video_start_time
            results.append({
                "video": str(video_path),
                "success": True,
                "asr_result": [s.to_dict() for s in asr_sentences],
                "merged_result": str(output_file),
                "speaker_counts": speaker_counts,
-                "total_sentences": len(asr_sentences)
+                "total_sentences": len(asr_sentences),
+                "process_time": video_process_time
            })

-            print(f"  ✓ 处理完成")
+            print(f"  ✓ 处理完成 (耗时：{video_process_time:.1f}s)")
            print(f"    - 句子数：{len(asr_sentences)}")
            print(f"    - 说话人：{speaker_counts}")

        except Exception as e:
            import traceback
+            video_process_time = time.time() - video_start_time
            print(f"  ✗ 处理失败：{e}")
            traceback.print_exc()
            results.append({
                "video": str(video_path),
                "success": False,
-                "error": str(e)
+                "error": str(e),
+                "process_time": video_process_time
            })

        finally:
@ -390,10 +448,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
    print(f"\n✓ 第二阶段完成，耗时：{total_time:.1f}s")
    print()

-    return results
-
    # 汇总报告
-    total_time = time.time() - start_time
    success_count = sum(1 for r in results if r["success"])

    print("\n" + "=" * 60)
@ -417,6 +472,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
                "output": r.get("merged_result"),
                "total_sentences": r.get("total_sentences", 0),
                "speaker_counts": r.get("speaker_counts", {}),
+                "process_time_seconds": round(r.get("process_time", 0.0), 2),
                "error": r.get("error")
            }
            for r in results