优化输出结果
This commit is contained in:
parent
07c1eca03b
commit
442afff195
|
|
@ -183,7 +183,7 @@ class ASRService:
|
||||||
asr_begin = asr_seg["begin_time"]
|
asr_begin = asr_seg["begin_time"]
|
||||||
asr_end = asr_seg["end_time"]
|
asr_end = asr_seg["end_time"]
|
||||||
|
|
||||||
best_speaker = "SPEAKER_00"
|
best_speaker = "speaker_0"
|
||||||
best_overlap = 0.0
|
best_overlap = 0.0
|
||||||
|
|
||||||
for dia_seg in diarization_segments:
|
for dia_seg in diarization_segments:
|
||||||
|
|
|
||||||
35
main.py
35
main.py
|
|
@ -282,10 +282,10 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 2. 加载说话人分离结果
|
# 2. 加载说话人分离结果
|
||||||
from map_speaker import find_speaker, load_json
|
from map_speaker import load_json
|
||||||
diar_result = load_json(diar_path)
|
diar_result = load_json(diar_path)
|
||||||
|
|
||||||
# 3. 执行 ASR 识别
|
# 3. 执行 ASR 识别(不使用 ASR 自带的说话人)
|
||||||
asr_sentences = asr_service.recognize(wav_path)
|
asr_sentences = asr_service.recognize(wav_path)
|
||||||
|
|
||||||
if not asr_sentences:
|
if not asr_sentences:
|
||||||
|
|
@ -297,15 +297,32 @@ def process_batch_asr(video_paths: List[Path], diar_results: Dict, max_workers:
|
||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 4. 合并说话人信息
|
# 4. 合并说话人信息(只使用 3D-Speaker 结果)
|
||||||
print(f" 合并结果...")
|
print(f" 合并结果...")
|
||||||
for sentence in asr_sentences:
|
for sentence in asr_sentences:
|
||||||
new_speaker = find_speaker(
|
# 查找该时间段对应的说话人
|
||||||
sentence.begin_time,
|
matched_speaker = None
|
||||||
sentence.end_time,
|
best_overlap = 0.0
|
||||||
diar_result["segments"]
|
|
||||||
)
|
for seg in diar_result["segments"]:
|
||||||
sentence.speaker = new_speaker
|
seg_begin = seg['begin_time']
|
||||||
|
seg_end = seg['end_time']
|
||||||
|
|
||||||
|
# 计算重叠时间
|
||||||
|
overlap_begin = max(sentence.begin_time, seg_begin)
|
||||||
|
overlap_end = min(sentence.end_time, seg_end)
|
||||||
|
|
||||||
|
if overlap_begin < overlap_end:
|
||||||
|
overlap_duration = overlap_end - overlap_begin
|
||||||
|
if overlap_duration > best_overlap:
|
||||||
|
best_overlap = overlap_duration
|
||||||
|
matched_speaker = seg['speaker']
|
||||||
|
|
||||||
|
# 如果有匹配,使用匹配的说话人;否则使用 speaker_0
|
||||||
|
if matched_speaker:
|
||||||
|
sentence.speaker = matched_speaker
|
||||||
|
else:
|
||||||
|
sentence.speaker = "speaker_0"
|
||||||
|
|
||||||
# 5. 保存最终结果
|
# 5. 保存最终结果
|
||||||
output_file = OUTPUT_DIR / f"{video_path.stem}_result.json"
|
output_file = OUTPUT_DIR / f"{video_path.stem}_result.json"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue