添加耗时记录
This commit is contained in:
parent
1e9906ef1a
commit
cebfddd13b
92
main.py
92
main.py
|
|
@ -135,7 +135,7 @@ def extract_wav(video_path: Path, temp_dir: Path) -> Optional[Path]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
|
def process_batch_diarization(video_paths, max_workers=1):
|
||||||
"""
|
"""
|
||||||
第一阶段:批量执行说话人分离(主进程顺序处理)
|
第一阶段:批量执行说话人分离(主进程顺序处理)
|
||||||
|
|
||||||
|
|
@ -144,7 +144,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
|
||||||
max_workers: 并发数(目前固定为 1)
|
max_workers: 并发数(目前固定为 1)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[video_path -> diar_result_path]: 说话人分离结果映射
|
字典:video_path -> diar_result_path
|
||||||
"""
|
"""
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
print("第一阶段:批量说话人分离")
|
print("第一阶段:批量说话人分离")
|
||||||
|
|
@ -172,6 +172,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
|
||||||
|
|
||||||
# 顺序处理每个视频
|
# 顺序处理每个视频
|
||||||
for i, video_path in enumerate(video_paths, 1):
|
for i, video_path in enumerate(video_paths, 1):
|
||||||
|
video_start_time = time.time()
|
||||||
try:
|
try:
|
||||||
print(f"\n[{i}/{len(video_paths)}] 处理:{video_path.name}")
|
print(f"\n[{i}/{len(video_paths)}] 处理:{video_path.name}")
|
||||||
|
|
||||||
|
|
@ -179,6 +180,12 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
|
||||||
wav_path = extract_wav(video_path, TEMP_DIR)
|
wav_path = extract_wav(video_path, TEMP_DIR)
|
||||||
if wav_path is None:
|
if wav_path is None:
|
||||||
print(f" ✗ 音频提取失败")
|
print(f" ✗ 音频提取失败")
|
||||||
|
results[video_path] = {
|
||||||
|
"success": False,
|
||||||
|
"diar_result": None,
|
||||||
|
"error": "音频提取失败",
|
||||||
|
"process_time": time.time() - video_start_time
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 2. 执行说话人分离
|
# 2. 执行说话人分离
|
||||||
|
|
@ -186,6 +193,12 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
|
||||||
|
|
||||||
if not diar_segments:
|
if not diar_segments:
|
||||||
print(f" ✗ 说话人分离结果为空")
|
print(f" ✗ 说话人分离结果为空")
|
||||||
|
results[video_path] = {
|
||||||
|
"success": False,
|
||||||
|
"diar_result": None,
|
||||||
|
"error": "说话人分离结果为空",
|
||||||
|
"process_time": time.time() - video_start_time
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 3. 保存说话人分离结果(临时文件)
|
# 3. 保存说话人分离结果(临时文件)
|
||||||
|
|
@ -196,16 +209,29 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
|
||||||
from map_speaker import save_json
|
from map_speaker import save_json
|
||||||
save_json(temp_diar_path, diar_result)
|
save_json(temp_diar_path, diar_result)
|
||||||
|
|
||||||
results[video_path] = str(temp_diar_path)
|
video_process_time = time.time() - video_start_time
|
||||||
print(f" ✓ 说话人分离完成")
|
results[video_path] = {
|
||||||
|
"success": True,
|
||||||
|
"diar_result": str(temp_diar_path),
|
||||||
|
"error": None,
|
||||||
|
"process_time": video_process_time
|
||||||
|
}
|
||||||
|
print(f" ✓ 说话人分离完成 (耗时:{video_process_time:.1f}s)")
|
||||||
|
|
||||||
# 4. 清理临时 WAV(保留用于后续 ASR)
|
# 4. 清理临时 WAV(保留用于后续 ASR)
|
||||||
# 注意:这里不删除,ASR 阶段还需要
|
# 注意:这里不删除,ASR 阶段还需要
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import traceback
|
import traceback
|
||||||
|
video_process_time = time.time() - video_start_time
|
||||||
print(f" ✗ 处理失败:{e}")
|
print(f" ✗ 处理失败:{e}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
results[video_path] = {
|
||||||
|
"success": False,
|
||||||
|
"diar_result": None,
|
||||||
|
"error": str(e),
|
||||||
|
"process_time": video_process_time
|
||||||
|
}
|
||||||
|
|
||||||
# 显示进度
|
# 显示进度
|
||||||
elapsed = time.time() - start_time
|
elapsed = time.time() - start_time
|
||||||
|
|
@ -222,7 +248,7 @@ def process_batch_diarization(video_paths: List[Path], max_workers: int = 1):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers: int = 1):
|
def process_batch_asr(video_paths, diar_results, max_workers=1):
|
||||||
"""
|
"""
|
||||||
第二阶段:批量执行 ASR 识别并合并结果(主进程顺序处理)
|
第二阶段:批量执行 ASR 识别并合并结果(主进程顺序处理)
|
||||||
|
|
||||||
|
|
@ -232,7 +258,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
max_workers: 并发数(目前固定为 1)
|
max_workers: 并发数(目前固定为 1)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict]: 最终结果列表
|
列表:最终结果列表
|
||||||
"""
|
"""
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
print("第二阶段:批量语音识别 + 合并结果")
|
print("第二阶段:批量语音识别 + 合并结果")
|
||||||
|
|
@ -255,13 +281,39 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
|
|
||||||
# 顺序处理每个视频
|
# 顺序处理每个视频
|
||||||
for i, video_path in enumerate(video_paths, 1):
|
for i, video_path in enumerate(video_paths, 1):
|
||||||
diar_path = diar_results.get(video_path)
|
video_start_time = time.time()
|
||||||
if not diar_path:
|
diar_info = diar_results.get(video_path)
|
||||||
|
|
||||||
|
# 检查第一阶段的结果(现在是字典结构)
|
||||||
|
if not diar_info:
|
||||||
print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(无说话人分离结果)")
|
print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(无说话人分离结果)")
|
||||||
results.append({
|
results.append({
|
||||||
"video": str(video_path),
|
"video": str(video_path),
|
||||||
"success": False,
|
"success": False,
|
||||||
"error": "无说话人分离结果"
|
"error": "无说话人分离结果",
|
||||||
|
"process_time": 0.0
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 如果第一阶段失败,跳过该视频
|
||||||
|
if not diar_info.get("success"):
|
||||||
|
print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(第一阶段失败:{diar_info.get('error')})")
|
||||||
|
results.append({
|
||||||
|
"video": str(video_path),
|
||||||
|
"success": False,
|
||||||
|
"error": f"说话人分离失败:{diar_info.get('error')}",
|
||||||
|
"process_time": diar_info.get("process_time", 0.0)
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
diar_path = diar_info.get("diar_result")
|
||||||
|
if not diar_path:
|
||||||
|
print(f"\n[{i}/{len(video_paths)}] 跳过 {video_path.name}(无说话人分离结果文件)")
|
||||||
|
results.append({
|
||||||
|
"video": str(video_path),
|
||||||
|
"success": False,
|
||||||
|
"error": "说话人分离结果文件不存在",
|
||||||
|
"process_time": 0.0
|
||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -279,7 +331,8 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
results.append({
|
results.append({
|
||||||
"video": str(video_path),
|
"video": str(video_path),
|
||||||
"success": False,
|
"success": False,
|
||||||
"error": "音频提取失败"
|
"error": "音频提取失败",
|
||||||
|
"process_time": time.time() - video_start_time
|
||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -295,7 +348,8 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
results.append({
|
results.append({
|
||||||
"video": str(video_path),
|
"video": str(video_path),
|
||||||
"success": False,
|
"success": False,
|
||||||
"error": "ASR 识别结果为空"
|
"error": "ASR 识别结果为空",
|
||||||
|
"process_time": time.time() - video_start_time
|
||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -328,7 +382,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
|
|
||||||
# 5. 保存最终结果
|
# 5. 保存最终结果
|
||||||
output_file = OUTPUT_DIR / f"{video_path.stem}_result.json"
|
output_file = OUTPUT_DIR / f"{video_path.stem}_result.json"
|
||||||
# 确保 asr_sentences 是 List[Sentence] 类型
|
# 确保 asr_sentences 是列表类型
|
||||||
if isinstance(asr_sentences, dict):
|
if isinstance(asr_sentences, dict):
|
||||||
# 如果是字典,尝试获取 sentences 键或转换为空列表
|
# 如果是字典,尝试获取 sentences 键或转换为空列表
|
||||||
asr_sentences = asr_sentences.get("sentences", [])
|
asr_sentences = asr_sentences.get("sentences", [])
|
||||||
|
|
@ -341,27 +395,31 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
speaker = sentence.speaker
|
speaker = sentence.speaker
|
||||||
speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1
|
speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1
|
||||||
|
|
||||||
|
video_process_time = time.time() - video_start_time
|
||||||
results.append({
|
results.append({
|
||||||
"video": str(video_path),
|
"video": str(video_path),
|
||||||
"success": True,
|
"success": True,
|
||||||
"asr_result": [s.to_dict() for s in asr_sentences],
|
"asr_result": [s.to_dict() for s in asr_sentences],
|
||||||
"merged_result": str(output_file),
|
"merged_result": str(output_file),
|
||||||
"speaker_counts": speaker_counts,
|
"speaker_counts": speaker_counts,
|
||||||
"total_sentences": len(asr_sentences)
|
"total_sentences": len(asr_sentences),
|
||||||
|
"process_time": video_process_time
|
||||||
})
|
})
|
||||||
|
|
||||||
print(f" ✓ 处理完成")
|
print(f" ✓ 处理完成 (耗时:{video_process_time:.1f}s)")
|
||||||
print(f" - 句子数:{len(asr_sentences)}")
|
print(f" - 句子数:{len(asr_sentences)}")
|
||||||
print(f" - 说话人:{speaker_counts}")
|
print(f" - 说话人:{speaker_counts}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import traceback
|
import traceback
|
||||||
|
video_process_time = time.time() - video_start_time
|
||||||
print(f" ✗ 处理失败:{e}")
|
print(f" ✗ 处理失败:{e}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
results.append({
|
results.append({
|
||||||
"video": str(video_path),
|
"video": str(video_path),
|
||||||
"success": False,
|
"success": False,
|
||||||
"error": str(e)
|
"error": str(e),
|
||||||
|
"process_time": video_process_time
|
||||||
})
|
})
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
@ -390,10 +448,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
print(f"\n✓ 第二阶段完成,耗时:{total_time:.1f}s")
|
print(f"\n✓ 第二阶段完成,耗时:{total_time:.1f}s")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
# 汇总报告
|
# 汇总报告
|
||||||
total_time = time.time() - start_time
|
|
||||||
success_count = sum(1 for r in results if r["success"])
|
success_count = sum(1 for r in results if r["success"])
|
||||||
|
|
||||||
print("\n" + "=" * 60)
|
print("\n" + "=" * 60)
|
||||||
|
|
@ -417,6 +472,7 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
"output": r.get("merged_result"),
|
"output": r.get("merged_result"),
|
||||||
"total_sentences": r.get("total_sentences", 0),
|
"total_sentences": r.get("total_sentences", 0),
|
||||||
"speaker_counts": r.get("speaker_counts", {}),
|
"speaker_counts": r.get("speaker_counts", {}),
|
||||||
|
"process_time_seconds": round(r.get("process_time", 0.0), 2),
|
||||||
"error": r.get("error")
|
"error": r.get("error")
|
||||||
}
|
}
|
||||||
for r in results
|
for r in results
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue