import json def load_json(filepath): with open(filepath, 'r', encoding='utf-8') as f: return json.load(f) def save_json(filepath, data): with open(filepath, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) def find_speaker(begin_time, end_time, diarization_segments): max_overlap = 0 best_speaker = "SPEAKER_00" for seg in diarization_segments: seg_begin = seg['begin_time'] seg_end = seg['end_time'] overlap_begin = max(begin_time, seg_begin) overlap_end = min(end_time, seg_end) if overlap_begin < overlap_end: overlap_duration = overlap_end - overlap_begin if overlap_duration > max_overlap: max_overlap = overlap_duration best_speaker = seg['speaker'] return best_speaker def main(): diarization = load_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\result.json') transcription = load_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\output\VID_20251031_132320_019_mono_result.json') diarization_segments = diarization['segments'] for sentence in transcription['sentences']: begin_time = sentence['begin_time'] end_time = sentence['end_time'] new_speaker = find_speaker(begin_time, end_time, diarization_segments) sentence['speaker'] = new_speaker save_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\output\VID_20251031_132320_019_mono_result.json', transcription) speaker_counts = {} for sentence in transcription['sentences']: speaker = sentence['speaker'] speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1 print("说话人统计:") for speaker, count in sorted(speaker_counts.items()): print(f" {speaker}: {count} 句") if __name__ == '__main__': main()