优化输出结果

This commit is contained in:
yueliuli 2026-04-30 16:46:37 +08:00
parent 07c1eca03b
commit 442afff195
2 changed files with 27 additions and 10 deletions

View File

@ -183,7 +183,7 @@ class ASRService:
asr_begin = asr_seg["begin_time"] asr_begin = asr_seg["begin_time"]
asr_end = asr_seg["end_time"] asr_end = asr_seg["end_time"]
best_speaker = "SPEAKER_00" best_speaker = "speaker_0"
best_overlap = 0.0 best_overlap = 0.0
for dia_seg in diarization_segments: for dia_seg in diarization_segments:

35
main.py
View File

@ -282,10 +282,10 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
continue continue
# 2. 加载说话人分离结果 # 2. 加载说话人分离结果
from map_speaker import find_speaker, load_json from map_speaker import load_json
diar_result = load_json(diar_path) diar_result = load_json(diar_path)
# 3. 执行 ASR 识别 # 3. 执行 ASR 识别(不使用 ASR 自带的说话人)
asr_sentences = asr_service.recognize(wav_path) asr_sentences = asr_service.recognize(wav_path)
if not asr_sentences: if not asr_sentences:
@ -297,15 +297,32 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
}) })
continue continue
# 4. 合并说话人信息 # 4. 合并说话人信息(只使用 3D-Speaker 结果)
print(f" 合并结果...") print(f" 合并结果...")
for sentence in asr_sentences: for sentence in asr_sentences:
new_speaker = find_speaker( # 查找该时间段对应的说话人
sentence.begin_time, matched_speaker = None
sentence.end_time, best_overlap = 0.0
diar_result["segments"]
) for seg in diar_result["segments"]:
sentence.speaker = new_speaker seg_begin = seg['begin_time']
seg_end = seg['end_time']
# 计算重叠时间
overlap_begin = max(sentence.begin_time, seg_begin)
overlap_end = min(sentence.end_time, seg_end)
if overlap_begin < overlap_end:
overlap_duration = overlap_end - overlap_begin
if overlap_duration > best_overlap:
best_overlap = overlap_duration
matched_speaker = seg['speaker']
# 如果有匹配,使用匹配的说话人;否则使用 speaker_0
if matched_speaker:
sentence.speaker = matched_speaker
else:
sentence.speaker = "speaker_0"
# 5. 保存最终结果 # 5. 保存最终结果
output_file = OUTPUT_DIR / f"{video_path.stem}_result.json" output_file = OUTPUT_DIR / f"{video_path.stem}_result.json"