添加耗时记录

This commit is contained in:
yueliuli 2026-04-30 17:55:44 +08:00
parent 1e9906ef1a
commit cebfddd13b
1 changed files with 74 additions and 18 deletions

92
main.py
View File

@ -135,7 +135,7 @@ def extract_wav(video_path: Path, temp_dir: Path) -> Optional[Path]:
def process_batch_diarization(video_paths: List[Path], max_workers: int = 1): def process_batch_diarization(video_paths, max_workers=1):
""" """
第一阶段批量执行说话人分离主进程顺序处理 第一阶段批量执行说话人分离主进程顺序处理
@ -144,7 +144,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
max_workers: 并发数目前固定为 1 max_workers: 并发数目前固定为 1
Returns: Returns:
Dict[video_path -> diar_result_path]: 说话人分离结果映射 字典video_path -> diar_result_path
""" """
print("=" * 60) print("=" * 60)
print("第一阶段:批量说话人分离") print("第一阶段:批量说话人分离")
@ -172,6 +172,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
# 顺序处理每个视频 # 顺序处理每个视频
for i, video_path in enumerate(video_paths, 1): for i, video_path in enumerate(video_paths, 1):
video_start_time = time.time()
try: try:
print(f"\n[{i}/{len(video_paths)}] 处理:{video_path.name}") print(f"\n[{i}/{len(video_paths)}] 处理:{video_path.name}")
@ -179,6 +180,12 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
wav_path = extract_wav(video_path, TEMP_DIR) wav_path = extract_wav(video_path, TEMP_DIR)
if wav_path is None: if wav_path is None:
print(f" ✗ 音频提取失败") print(f" ✗ 音频提取失败")
results[video_path] = {
"success": False,
"diar_result": None,
"error": "音频提取失败",
"process_time": time.time() - video_start_time
}
continue continue
# 2. 执行说话人分离 # 2. 执行说话人分离
@ -186,6 +193,12 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
if not diar_segments: if not diar_segments:
print(f" ✗ 说话人分离结果为空") print(f" ✗ 说话人分离结果为空")
results[video_path] = {
"success": False,
"diar_result": None,
"error": "说话人分离结果为空",
"process_time": time.time() - video_start_time
}
continue continue
# 3. 保存说话人分离结果(临时文件) # 3. 保存说话人分离结果(临时文件)
@ -196,16 +209,29 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
from map_speaker import save_json from map_speaker import save_json
save_json(temp_diar_path, diar_result) save_json(temp_diar_path, diar_result)
results[video_path] = str(temp_diar_path) video_process_time = time.time() - video_start_time
print(f" ✓ 说话人分离完成") results[video_path] = {
"success": True,
"diar_result": str(temp_diar_path),
"error": None,
"process_time": video_process_time
}
print(f" ✓ 说话人分离完成 (耗时:{video_process_time:.1f}s)")
# 4. 清理临时 WAV保留用于后续 ASR # 4. 清理临时 WAV保留用于后续 ASR
# 注意这里不删除ASR 阶段还需要 # 注意这里不删除ASR 阶段还需要
except Exception as e: except Exception as e:
import traceback import traceback
video_process_time = time.time() - video_start_time
print(f" ✗ 处理失败:{e}") print(f" ✗ 处理失败:{e}")
traceback.print_exc() traceback.print_exc()
results[video_path] = {
"success": False,
"diar_result": None,
"error": str(e),
"process_time": video_process_time
}
# 显示进度 # 显示进度
elapsed = time.time() - start_time elapsed = time.time() - start_time
@ -222,7 +248,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
return results return results
def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers: int = 1): def process_batch_asr(video_paths, diar_results, max_workers=1):
""" """
第二阶段批量执行 ASR 识别并合并结果主进程顺序处理 第二阶段批量执行 ASR 识别并合并结果主进程顺序处理
@ -232,7 +258,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
max_workers: 并发数目前固定为 1 max_workers: 并发数目前固定为 1
Returns: Returns:
List[Dict]: 最终结果列表 列表最终结果列表
""" """
print("=" * 60) print("=" * 60)
print("第二阶段:批量语音识别 + 合并结果") print("第二阶段:批量语音识别 + 合并结果")
@ -255,13 +281,39 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
# 顺序处理每个视频 # 顺序处理每个视频
for i, video_path in enumerate(video_paths, 1): for i, video_path in enumerate(video_paths, 1):
diar_path = diar_results.get(video_path) video_start_time = time.time()
if not diar_path: diar_info = diar_results.get(video_path)
# 检查第一阶段的结果(现在是字典结构)
if not diar_info:
print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(无说话人分离结果)") print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(无说话人分离结果)")
results.append({ results.append({
"video": str(video_path), "video": str(video_path),
"success": False, "success": False,
"error": "无说话人分离结果" "error": "无说话人分离结果",
"process_time": 0.0
})
continue
# 如果第一阶段失败,跳过该视频
if not diar_info.get("success"):
print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(第一阶段失败:{diar_info.get('error')})")
results.append({
"video": str(video_path),
"success": False,
"error": f"说话人分离失败:{diar_info.get('error')}",
"process_time": diar_info.get("process_time", 0.0)
})
continue
diar_path = diar_info.get("diar_result")
if not diar_path:
print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(无说话人分离结果文件)")
results.append({
"video": str(video_path),
"success": False,
"error": "说话人分离结果文件不存在",
"process_time": 0.0
}) })
continue continue
@ -279,7 +331,8 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
results.append({ results.append({
"video": str(video_path), "video": str(video_path),
"success": False, "success": False,
"error": "音频提取失败" "error": "音频提取失败",
"process_time": time.time() - video_start_time
}) })
continue continue
@ -295,7 +348,8 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
results.append({ results.append({
"video": str(video_path), "video": str(video_path),
"success": False, "success": False,
"error": "ASR 识别结果为空" "error": "ASR 识别结果为空",
"process_time": time.time() - video_start_time
}) })
continue continue
@ -328,7 +382,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
# 5. 保存最终结果 # 5. 保存最终结果
output_file = OUTPUT_DIR / f"{video_path.stem}_result.json" output_file = OUTPUT_DIR / f"{video_path.stem}_result.json"
# 确保 asr_sentences 是 List[Sentence] 类型 # 确保 asr_sentences 是列表类型
if isinstance(asr_sentences, dict): if isinstance(asr_sentences, dict):
# 如果是字典,尝试获取 sentences 键或转换为空列表 # 如果是字典,尝试获取 sentences 键或转换为空列表
asr_sentences = asr_sentences.get("sentences", []) asr_sentences = asr_sentences.get("sentences", [])
@ -341,27 +395,31 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
speaker = sentence.speaker speaker = sentence.speaker
speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1 speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1
video_process_time = time.time() - video_start_time
results.append({ results.append({
"video": str(video_path), "video": str(video_path),
"success": True, "success": True,
"asr_result": [s.to_dict() for s in asr_sentences], "asr_result": [s.to_dict() for s in asr_sentences],
"merged_result": str(output_file), "merged_result": str(output_file),
"speaker_counts": speaker_counts, "speaker_counts": speaker_counts,
"total_sentences": len(asr_sentences) "total_sentences": len(asr_sentences),
"process_time": video_process_time
}) })
print(f" ✓ 处理完成") print(f" ✓ 处理完成 (耗时:{video_process_time:.1f}s)")
print(f" - 句子数:{len(asr_sentences)}") print(f" - 句子数:{len(asr_sentences)}")
print(f" - 说话人:{speaker_counts}") print(f" - 说话人:{speaker_counts}")
except Exception as e: except Exception as e:
import traceback import traceback
video_process_time = time.time() - video_start_time
print(f" ✗ 处理失败:{e}") print(f" ✗ 处理失败:{e}")
traceback.print_exc() traceback.print_exc()
results.append({ results.append({
"video": str(video_path), "video": str(video_path),
"success": False, "success": False,
"error": str(e) "error": str(e),
"process_time": video_process_time
}) })
finally: finally:
@ -390,10 +448,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
print(f"\n✓ 第二阶段完成,耗时:{total_time:.1f}s") print(f"\n✓ 第二阶段完成,耗时:{total_time:.1f}s")
print() print()
return results
# 汇总报告 # 汇总报告
total_time = time.time() - start_time
success_count = sum(1 for r in results if r["success"]) success_count = sum(1 for r in results if r["success"])
print("\n" + "=" * 60) print("\n" + "=" * 60)
@ -417,6 +472,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
"output": r.get("merged_result"), "output": r.get("merged_result"),
"total_sentences": r.get("total_sentences", 0), "total_sentences": r.get("total_sentences", 0),
"speaker_counts": r.get("speaker_counts", {}), "speaker_counts": r.get("speaker_counts", {}),
"process_time_seconds": round(r.get("process_time", 0.0), 2),
"error": r.get("error") "error": r.get("error")
} }
for r in results for r in results