56 lines
1.9 KiB
Python
56 lines
1.9 KiB
Python
import json
|
|
|
|
def load_json(filepath):
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
def save_json(filepath, data):
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
def find_speaker(begin_time, end_time, diarization_segments):
|
|
max_overlap = 0
|
|
best_speaker = "SPEAKER_00"
|
|
|
|
for seg in diarization_segments:
|
|
seg_begin = seg['begin_time']
|
|
seg_end = seg['end_time']
|
|
|
|
overlap_begin = max(begin_time, seg_begin)
|
|
overlap_end = min(end_time, seg_end)
|
|
|
|
if overlap_begin < overlap_end:
|
|
overlap_duration = overlap_end - overlap_begin
|
|
if overlap_duration > max_overlap:
|
|
max_overlap = overlap_duration
|
|
best_speaker = seg['speaker']
|
|
|
|
return best_speaker
|
|
|
|
def main():
|
|
diarization = load_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\result.json')
|
|
transcription = load_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\output\VID_20251031_132320_019_mono_result.json')
|
|
|
|
diarization_segments = diarization['segments']
|
|
|
|
for sentence in transcription['sentences']:
|
|
begin_time = sentence['begin_time']
|
|
end_time = sentence['end_time']
|
|
|
|
new_speaker = find_speaker(begin_time, end_time, diarization_segments)
|
|
sentence['speaker'] = new_speaker
|
|
|
|
save_json(r'd:\Userfile\Projects\AnzezxianxHazardInspectAI\Code\audio\output\VID_20251031_132320_019_mono_result.json', transcription)
|
|
|
|
speaker_counts = {}
|
|
for sentence in transcription['sentences']:
|
|
speaker = sentence['speaker']
|
|
speaker_counts[speaker] = speaker_counts.get(speaker, 0) + 1
|
|
|
|
print("说话人统计:")
|
|
for speaker, count in sorted(speaker_counts.items()):
|
|
print(f" {speaker}: {count} 句")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|