SpeechRecognition/test/real/check_e2e.py

164 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
端到端测试诊断脚本
用于快速检查测试环境和依赖
"""
import sys
import os
from pathlib import Path
project_root = Path(__file__).parent.parent.parent.absolute()
os.chdir(project_root)
print("=" * 60)
print("端到端测试环境诊断")
print("=" * 60)
# 1. 检查 Python 版本
print(f"\n[OK] Python 版本:{sys.version}")
print(f" 路径:{sys.executable}")
# 2. 检查测试配置文件
print("\n" + "-" * 60)
print("测试配置文件检查")
print("-" * 60)
config_file = Path(__file__).parent / "test_config.yaml"
if config_file.exists():
print(f"[OK] 配置文件存在:{config_file}")
import yaml
with open(config_file, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
print(f"[OK] 主测试视频:{config['test_files']['primary_video']}")
print(f"[OK] 备用视频:{len(config['test_files']['backup_videos'])}")
print(f"[OK] 音频文件:{len(config['test_files']['audio_files'])}")
# 检查文件是否存在
for key, files in [('视频', config['test_files']['backup_videos']),
('音频', config['test_files']['audio_files'])]:
for file_path in files:
full_path = project_root / file_path
exists = "[OK]" if full_path.exists() else "[X]"
print(f" {exists} {file_path}")
else:
print(f"[X] 配置文件不存在:{config_file}")
# 3. 检查测试文件
print("\n" + "-" * 60)
print("测试文件检查")
print("-" * 60)
test_files_dir = Path(__file__).parent
print(f"测试目录:{test_files_dir}")
# 4. 检查依赖
print("\n" + "-" * 60)
print("依赖检查")
print("-" * 60)
dependencies = {
'pytest': 'pytest',
'yaml': 'pyyaml',
'flask': 'flask',
'torch': 'pytorch',
'librosa': 'librosa',
}
for module, package in dependencies.items():
try:
__import__(module)
print(f"[OK] {package}")
except ImportError:
print(f"[X] {package} (需要安装pip install {package})")
# 5. 检查实际测试文件
print("\n" + "-" * 60)
print("实际测试文件检查")
print("-" * 60)
input_dir = project_root / "input"
if input_dir.exists():
print(f"输入目录:{input_dir}")
video_files = list(input_dir.glob("*.AVI")) + list(input_dir.glob("*.avi")) + \
list(input_dir.glob("*.mp4")) + list(input_dir.glob("*.MP4"))
audio_files = list(input_dir.glob("*.wav")) + list(input_dir.glob("*.WAV"))
print(f" 视频文件:{len(video_files)}")
for f in video_files[:5]:
size_mb = f.stat().st_size / 1024 / 1024
print(f" - {f.name} ({size_mb:.2f} MB)")
print(f" 音频文件:{len(audio_files)}")
for f in audio_files[:5]:
size_mb = f.stat().st_size / 1024 / 1024
print(f" - {f.name} ({size_mb:.2f} MB)")
else:
print(f"[X] 输入目录不存在:{input_dir}")
# 6. 检查 ASR 模型
print("\n" + "-" * 60)
print("ASR 模型检查")
print("-" * 60)
try:
from app.asr.asr_service import ASRService
print("[OK] ASR 服务模块可导入")
# 检查模型缓存目录
import os
model_cache = os.environ.get("MODELSCOPE_CACHE", "未设置")
print(f" 模型缓存目录:{model_cache}")
cache_path = Path(model_cache) if model_cache != "未设置" else None
if cache_path and cache_path.exists():
model_files = list(cache_path.glob("*"))
print(f" 缓存的模型文件:{len(model_files)}")
else:
print(f" [!] 模型缓存目录不存在(首次运行会下载模型)")
except Exception as e:
print(f"[X] ASR 服务导入失败:{e}")
# 7. 检查 ffmpeg
print("\n" + "-" * 60)
print("FFmpeg 检查")
print("-" * 60)
ffmpeg_path = project_root / "lib" / "ffmpeg.exe"
if ffmpeg_path.exists():
print(f"[OK] ffmpeg 存在:{ffmpeg_path}")
else:
print(f"[X] ffmpeg 不存在:{ffmpeg_path}")
print(" 请确保 ffmpeg 在 lib 目录下")
# 8. 运行建议
print("\n" + "=" * 60)
print("运行建议")
print("=" * 60)
print("""
运行端到端测试:
pytest test/real/ -v
只运行快速验证测试(不测试 ASR
pytest test/real/test_real_e2e.py::TestVideoFileValidation -v
pytest test/real/test_real_e2e.py::TestAudioFileValidation -v
pytest test/real/test_real_e2e.py::TestErrorHandling -v
运行单个 ASR 测试(带详细输出):
pytest test/real/test_real_e2e.py::TestASRRecognition::test_recognize_audio_file -v -s
如果测试卡住,按 Ctrl+C 查看堆栈跟踪
注意事项:
1. 首次运行会下载 ASR 模型(可能很大,需要时间)
2. CPU 运行 ASR 推理较慢1 分钟音频可能需要 2-5 分钟)
3. 确保有足够的磁盘空间(临时文件 + 模型缓存)
4. 如果内存不足,建议使用较小的测试文件
""")
print("=" * 60)
print("诊断完成")
print("=" * 60)