ai/SpeechRecognition/map_speaker.py

56 lines
1.9 KiB
Python

import json
def load_json(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
def save_json(filepath, data):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def find_speaker(begin_time, end_time, diarization_segments):
max_overlap = 0
best_speaker = "SPEAKER_00"
for seg in diarization_segments:
seg_begin = seg['begin_time']
seg_end = seg['end_time']
overlap_begin = max(begin_time, seg_begin)
overlap_end = min(end_time, seg_end)
if overlap_begin < overlap_end:
overlap_duration = overlap_end - overlap_begin
if overlap_duration > max_overlap:
max_overlap = overlap_duration
best_speaker = seg['speaker']
return best_speaker
def main():
diarization = load_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\result.json')
transcription = load_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\output\VID_20251031_132320_019_mono_result.json')
diarization_segments = diarization['segments']
for sentence in transcription['sentences']:
begin_time = sentence['begin_time']
end_time = sentence['end_time']
new_speaker = find_speaker(begin_time, end_time, diarization_segments)
sentence['speaker'] = new_speaker
save_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\output\VID_20251031_132320_019_mono_result.json', transcription)
speaker_counts = {}
for sentence in transcription['sentences']:
speaker = sentence['speaker']
speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1
print("说话人统计:")
for speaker, count in sorted(speaker_counts.items()):
print(f" {speaker}: {count}")
if __name__ == '__main__':
main()