SpeechRecognition/app/asr/map_speaker.py

28 lines
847 B
Python

import json
def load_json(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
def save_json(filepath, data):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def find_speaker(begin_time, end_time, diarization_segments):
max_overlap = 0
best_speaker = "SPEAKER_00"
for seg in diarization_segments:
seg_begin = seg['begin_time']
seg_end = seg['end_time']
overlap_begin = max(begin_time, seg_begin)
overlap_end = min(end_time, seg_end)
if overlap_begin < overlap_end:
overlap_duration = overlap_end - overlap_begin
if overlap_duration > max_overlap:
max_overlap = overlap_duration
best_speaker = seg['speaker']
return best_speaker