1772 lines
60 KiB
Python
1772 lines
60 KiB
Python
"""
|
||
语音识别器
|
||
负责将语音转换为文本
|
||
"""
|
||
|
||
import time
|
||
import numpy as np
|
||
from typing import Dict, Any, List, Callable
|
||
import threading
|
||
import json
|
||
import re
|
||
|
||
class SpeechRecognizer:
|
||
"""
|
||
语音识别器
|
||
负责将语音转换为文本,支持多种语言和识别引擎
|
||
"""
|
||
|
||
def __init__(self, plugin):
|
||
"""
|
||
初始化语音识别器
|
||
|
||
Args:
|
||
plugin: 语音识别和合成插件实例
|
||
"""
|
||
self.plugin = plugin
|
||
self.enabled = False
|
||
self.initialized = False
|
||
self.is_listening = False
|
||
self.is_processing = False
|
||
|
||
# 识别配置
|
||
self.language = plugin.config.get('language', 'zh-CN')
|
||
self.sample_rate = plugin.config.get('sample_rate', 16000)
|
||
self.channels = plugin.config.get('channels', 1)
|
||
|
||
# 识别状态
|
||
self.recognition_thread = None
|
||
self.audio_buffer = []
|
||
self.silence_counter = 0
|
||
self.voice_activity_counter = 0
|
||
self.is_voice_detected = False
|
||
self.voice_start_time = 0.0
|
||
self.last_voice_time = 0.0
|
||
|
||
# 识别参数
|
||
self.recognition_params = {
|
||
'silence_threshold': 0.005,
|
||
'voice_threshold': 0.01,
|
||
'silence_duration': 1.0, # 秒
|
||
'min_voice_duration': 0.3, # 秒
|
||
'max_recording_duration': 30.0, # 秒
|
||
'confidence_threshold': 0.7, # 置信度阈值
|
||
'enable_punctuation': True, # 启用标点符号
|
||
'enable_profanity_filter': True, # 启用脏话过滤
|
||
'max_alternatives': 3, # 最大备选结果数
|
||
'enable_word_timing': True, # 启用单词时间戳
|
||
'enable_speaker_diarization': False, # 启用说话人分离
|
||
'min_speaker_count': 1, # 最少说话人数
|
||
'max_speaker_count': 6, # 最多说话人数
|
||
'enable_contextual_biasing': True, # 启用上下文偏向
|
||
'contextual_phrases': [], # 上下文短语
|
||
'model_variant': 'default', # 模型变体
|
||
'enable_enhanced_models': True, # 启用增强模型
|
||
'recognition_timeout': 60.0, # 识别超时时间
|
||
'enable_real_time_feedback': True, # 启用实时反馈
|
||
'profanity_replace_char': '*', # 脏话替换字符
|
||
'enable_audio_preprocessing': True, # 启用音频预处理
|
||
'noise_reduction_level': 0.3, # 噪声消除级别
|
||
}
|
||
|
||
# 统计信息
|
||
self.stats = {
|
||
'recognitions_processed': 0,
|
||
'recognition_errors': 0,
|
||
'total_recognition_time': 0.0,
|
||
'average_recognition_time': 0.0,
|
||
'voice_segments_detected': 0,
|
||
'total_voice_duration': 0.0,
|
||
'words_per_minute': 0.0,
|
||
'confidence_average': 0.0,
|
||
'recognition_accuracy': 0.0
|
||
}
|
||
|
||
# 回调函数
|
||
self.recognition_callback = None
|
||
self.partial_result_callback = None
|
||
self.error_callback = None
|
||
self.voice_activity_callback = None
|
||
|
||
# 识别结果缓存
|
||
self.recognition_cache = {}
|
||
self.cache_size = 100
|
||
|
||
# 识别模式
|
||
self.recognition_mode = 'dictation' # 'command', 'dictation', 'search'
|
||
|
||
# 自定义词汇表
|
||
self.custom_lexicon = {} # 词汇到发音的映射
|
||
|
||
# 说话人信息
|
||
self.speaker_info = {}
|
||
|
||
# 模拟的识别结果数据库
|
||
self.simulated_database = {
|
||
'zh-CN': {
|
||
'你好世界': 0.95,
|
||
'今天天气怎么样': 0.92,
|
||
'我想测试语音识别功能': 0.90,
|
||
'这是一个语音识别测试': 0.88,
|
||
'语音识别插件工作正常': 0.91,
|
||
'请说出您要识别的内容': 0.89,
|
||
'语音识别技术非常有用': 0.87,
|
||
'我可以识别多种语言': 0.85,
|
||
'识别准确率很高': 0.86,
|
||
'支持实时语音识别': 0.84,
|
||
'语音命令控制': 0.93,
|
||
'打开游戏设置': 0.94,
|
||
'保存当前进度': 0.92,
|
||
'退出应用程序': 0.90,
|
||
'增加音量': 0.88,
|
||
'降低音量': 0.87,
|
||
'暂停游戏': 0.91,
|
||
'继续游戏': 0.90,
|
||
'重新开始': 0.89,
|
||
'显示地图': 0.88
|
||
},
|
||
'en-US': {
|
||
'Hello world': 0.95,
|
||
'How is the weather today': 0.92,
|
||
'I want to test speech recognition': 0.90,
|
||
'This is a speech recognition test': 0.88,
|
||
'Speech recognition plugin works fine': 0.91,
|
||
'Please speak what you want to recognize': 0.89,
|
||
'Speech recognition technology is very useful': 0.87,
|
||
'I can recognize multiple languages': 0.85,
|
||
'Recognition accuracy is high': 0.86,
|
||
'Support real-time speech recognition': 0.84,
|
||
'Voice command control': 0.93,
|
||
'Open game settings': 0.94,
|
||
'Save current progress': 0.92,
|
||
'Exit application': 0.90,
|
||
'Increase volume': 0.88,
|
||
'Decrease volume': 0.87,
|
||
'Pause game': 0.91,
|
||
'Resume game': 0.90,
|
||
'Restart': 0.89,
|
||
'Show map': 0.88
|
||
}
|
||
}
|
||
|
||
# 专业术语词汇表
|
||
self.technical_terms = {
|
||
'GPU': 'G P U',
|
||
'CPU': 'C P U',
|
||
'RAM': 'R A M',
|
||
'API': 'A P I',
|
||
'SDK': 'S D K',
|
||
'FPS': 'F P S',
|
||
'UI': 'U I',
|
||
'UX': 'U X'
|
||
}
|
||
|
||
# 脏话过滤词典
|
||
self.profanity_words = {
|
||
'zh-CN': ['脏话1', '脏话2', '脏话3'],
|
||
'en-US': ['profanity1', 'profanity2', 'profanity3']
|
||
}
|
||
|
||
print("✓ 语音识别器已创建")
|
||
|
||
def initialize(self) -> bool:
|
||
"""
|
||
初始化语音识别器
|
||
|
||
Returns:
|
||
是否初始化成功
|
||
"""
|
||
try:
|
||
# 初始化识别引擎(这里简化处理,实际项目中需要加载相应的识别模型或库)
|
||
print("✓ 语音识别引擎初始化完成")
|
||
|
||
self.initialized = True
|
||
print("✓ 语音识别器初始化完成")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ 语音识别器初始化失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def enable(self) -> bool:
|
||
"""
|
||
启用语音识别器
|
||
|
||
Returns:
|
||
是否启用成功
|
||
"""
|
||
try:
|
||
if not self.initialized:
|
||
print("✗ 语音识别器未初始化")
|
||
return False
|
||
|
||
self.enabled = True
|
||
print("✓ 语音识别器已启用")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ 语音识别器启用失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def disable(self):
|
||
"""禁用语音识别器"""
|
||
try:
|
||
self.stop_listening()
|
||
self.enabled = False
|
||
print("✓ 语音识别器已禁用")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 语音识别器禁用失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def finalize(self):
|
||
"""清理语音识别器资源"""
|
||
try:
|
||
self.disable()
|
||
self.initialized = False
|
||
print("✓ 语音识别器资源已清理")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 语音识别器资源清理失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def update(self, dt: float):
|
||
"""
|
||
更新语音识别器状态
|
||
|
||
Args:
|
||
dt: 时间增量
|
||
"""
|
||
# 这里可以处理定期更新的任务
|
||
pass
|
||
|
||
def start_listening(self) -> bool:
|
||
"""
|
||
开始语音监听
|
||
|
||
Returns:
|
||
是否开始成功
|
||
"""
|
||
try:
|
||
if not self.enabled:
|
||
print("✗ 语音识别器未启用")
|
||
return False
|
||
|
||
if self.is_listening:
|
||
print("⚠ 已经在监听中")
|
||
return True
|
||
|
||
self.is_listening = True
|
||
self.audio_buffer = []
|
||
self.silence_counter = 0
|
||
self.voice_activity_counter = 0
|
||
self.is_voice_detected = False
|
||
self.voice_start_time = 0.0
|
||
self.last_voice_time = 0.0
|
||
|
||
# 启动语音识别线程
|
||
self.recognition_thread = threading.Thread(target=self._recognition_worker, daemon=True)
|
||
self.recognition_thread.start()
|
||
|
||
# 启动录音
|
||
if self.plugin.speech_manager:
|
||
self.plugin.speech_manager.start_recording()
|
||
|
||
print("✓ 开始语音监听")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ 开始语音监听失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def stop_listening(self):
|
||
"""停止语音监听"""
|
||
try:
|
||
self.is_listening = False
|
||
|
||
if self.recognition_thread and self.recognition_thread.is_alive():
|
||
self.recognition_thread.join(timeout=1.0)
|
||
|
||
# 停止录音
|
||
if self.plugin.speech_manager:
|
||
self.plugin.speech_manager.stop_recording()
|
||
|
||
print("✓ 停止语音监听")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 停止语音监听失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def process_audio(self):
|
||
"""处理音频数据"""
|
||
try:
|
||
if not self.is_listening or not self.enabled:
|
||
return
|
||
|
||
# 从语音管理器获取音频数据
|
||
if self.plugin.speech_manager:
|
||
audio_chunk = self.plugin.speech_manager.get_audio_chunk()
|
||
|
||
# 可选的音频预处理
|
||
if self.recognition_params['enable_audio_preprocessing']:
|
||
audio_chunk = self.plugin.speech_manager.apply_speech_enhancement(audio_chunk)
|
||
|
||
# 检测语音活动
|
||
is_voice = self._detect_voice_activity(audio_chunk)
|
||
|
||
current_time = time.time()
|
||
|
||
if is_voice:
|
||
# 检测到语音
|
||
if not self.is_voice_detected:
|
||
self.is_voice_detected = True
|
||
self.voice_start_time = current_time
|
||
self.voice_activity_counter = 1
|
||
self.silence_counter = 0
|
||
print("✓ 检测到语音活动开始")
|
||
|
||
# 调用语音活动回调
|
||
if self.voice_activity_callback:
|
||
self.voice_activity_callback(True)
|
||
else:
|
||
self.voice_activity_counter += 1
|
||
self.last_voice_time = current_time
|
||
|
||
else:
|
||
# 检测到静音
|
||
if self.is_voice_detected:
|
||
self.silence_counter += 1
|
||
|
||
# 检查是否超过静音时长阈值
|
||
silence_duration = self.silence_counter * len(audio_chunk) / self.sample_rate
|
||
if silence_duration >= self.recognition_params['silence_duration']:
|
||
# 语音活动结束
|
||
voice_duration = current_time - self.voice_start_time
|
||
if voice_duration >= self.recognition_params['min_voice_duration']:
|
||
print(f"✓ 检测到语音活动结束,时长: {voice_duration:.2f}秒")
|
||
self._process_voice_segment()
|
||
else:
|
||
print("⚠ 语音片段太短,忽略")
|
||
|
||
# 重置状态
|
||
self.is_voice_detected = False
|
||
self.voice_activity_counter = 0
|
||
self.silence_counter = 0
|
||
|
||
# 调用语音活动回调
|
||
if self.voice_activity_callback:
|
||
self.voice_activity_callback(False)
|
||
|
||
# 将音频数据添加到缓冲区
|
||
self.audio_buffer.append(audio_chunk)
|
||
|
||
# 检查是否超过最大录音时长
|
||
total_samples = sum(len(chunk) for chunk in self.audio_buffer)
|
||
recording_time = total_samples / self.sample_rate
|
||
|
||
if recording_time >= self.recognition_params['max_recording_duration']:
|
||
print("⚠ 达到最大录音时长,强制处理")
|
||
self._process_voice_segment()
|
||
|
||
except Exception as e:
|
||
print(f"✗ 处理音频数据失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def _recognition_worker(self):
|
||
"""语音识别工作线程"""
|
||
try:
|
||
while self.is_listening and self.enabled:
|
||
self.process_audio()
|
||
time.sleep(0.01) # 10ms延迟
|
||
|
||
except Exception as e:
|
||
print(f"✗ 语音识别工作线程错误: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def _detect_voice_activity(self, audio_chunk: np.ndarray) -> bool:
|
||
"""
|
||
检测语音活动
|
||
|
||
Args:
|
||
audio_chunk: 音频数据块
|
||
|
||
Returns:
|
||
是否检测到语音活动
|
||
"""
|
||
try:
|
||
if len(audio_chunk) == 0:
|
||
return False
|
||
|
||
# 计算音频级别
|
||
audio_level = np.mean(np.abs(audio_chunk))
|
||
|
||
# 判断是否超过语音阈值
|
||
return audio_level > self.recognition_params['voice_threshold']
|
||
except Exception as e:
|
||
print(f"✗ 语音活动检测失败: {e}")
|
||
return False
|
||
|
||
def _process_voice_segment(self):
|
||
"""处理语音片段"""
|
||
try:
|
||
if not self.audio_buffer:
|
||
return
|
||
|
||
# 检查语音时长是否足够
|
||
total_samples = sum(len(chunk) for chunk in self.audio_buffer)
|
||
voice_duration = total_samples / self.sample_rate
|
||
|
||
if voice_duration >= self.recognition_params['min_voice_duration']:
|
||
# 执行语音识别
|
||
recognized_result = self._perform_recognition()
|
||
|
||
if recognized_result and 'text' in recognized_result and recognized_result['text']:
|
||
self.stats['recognitions_processed'] += 1
|
||
self.stats['voice_segments_detected'] += 1
|
||
self.stats['total_voice_duration'] += voice_duration
|
||
|
||
# 更新平均识别时间
|
||
if self.stats['recognitions_processed'] > 0:
|
||
self.stats['average_recognition_time'] = (
|
||
self.stats['total_recognition_time'] / self.stats['recognitions_processed']
|
||
)
|
||
|
||
# 更新平均置信度
|
||
if 'confidence' in recognized_result:
|
||
old_avg = self.stats['confidence_average']
|
||
n = self.stats['recognitions_processed']
|
||
self.stats['confidence_average'] = (old_avg * (n - 1) + recognized_result['confidence']) / n
|
||
|
||
# 调用回调函数
|
||
if self.plugin.speech_recognized_callback:
|
||
self.plugin.speech_recognized_callback(recognized_result['text'])
|
||
elif self.recognition_callback:
|
||
self.recognition_callback(recognized_result['text'])
|
||
|
||
print(f"✓ 识别结果: {recognized_result['text']} (置信度: {recognized_result.get('confidence', 0):.2f})")
|
||
|
||
# 缓存识别结果
|
||
self._cache_recognition_result(recognized_result['text'], recognized_result)
|
||
|
||
# 重置状态
|
||
self.audio_buffer = []
|
||
self.is_voice_detected = False
|
||
self.silence_counter = 0
|
||
self.voice_activity_counter = 0
|
||
self.voice_start_time = 0.0
|
||
self.last_voice_time = 0.0
|
||
|
||
except Exception as e:
|
||
print(f"✗ 处理语音片段失败: {e}")
|
||
self.stats['recognition_errors'] += 1
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def _perform_recognition(self) -> Dict[str, Any]:
|
||
"""
|
||
执行语音识别
|
||
|
||
Returns:
|
||
识别结果字典
|
||
"""
|
||
try:
|
||
recognition_start_time = time.time()
|
||
|
||
# 合并音频缓冲区
|
||
if not self.audio_buffer:
|
||
return {}
|
||
|
||
audio_data = np.concatenate(self.audio_buffer)
|
||
|
||
# 在实际项目中,这里应该调用真正的语音识别引擎
|
||
# 例如:Google Speech-to-Text, Microsoft Speech Services, 或本地模型
|
||
|
||
# 模拟识别过程
|
||
time.sleep(0.1) # 模拟识别延迟
|
||
|
||
# 生成模拟结果
|
||
result = self._generate_simulated_result(audio_data)
|
||
|
||
# 更新统计信息
|
||
recognition_time = time.time() - recognition_start_time
|
||
self.stats['total_recognition_time'] += recognition_time
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
print(f"✗ 语音识别失败: {e}")
|
||
self.stats['recognition_errors'] += 1
|
||
import traceback
|
||
traceback.print_exc()
|
||
return {}
|
||
|
||
def _generate_simulated_result(self, audio_data: np.ndarray) -> Dict[str, Any]:
|
||
"""
|
||
生成模拟识别结果
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
|
||
Returns:
|
||
识别结果字典
|
||
"""
|
||
try:
|
||
# 根据语言选择词汇表
|
||
language_vocab = self.simulated_database.get(self.language, self.simulated_database['zh-CN'])
|
||
|
||
# 计算音频特征以选择合适的短语
|
||
audio_energy = np.sum(audio_data ** 2) / len(audio_data)
|
||
audio_duration = len(audio_data) / self.sample_rate
|
||
|
||
# 根据音频特征选择短语
|
||
candidates = []
|
||
for phrase, base_confidence in language_vocab.items():
|
||
# 根据音频时长调整置信度
|
||
duration_match = 1.0 - abs(len(phrase) * 0.1 - audio_duration) / max(len(phrase) * 0.1, audio_duration)
|
||
energy_match = min(1.0, audio_energy * 1000) # 调整能量匹配
|
||
|
||
adjusted_confidence = base_confidence * duration_match * energy_match
|
||
candidates.append((phrase, adjusted_confidence))
|
||
|
||
# 选择置信度最高的结果
|
||
if candidates:
|
||
candidates.sort(key=lambda x: x[1], reverse=True)
|
||
best_phrase, confidence = candidates[0]
|
||
|
||
# 应用后处理
|
||
processed_phrase = self._post_process_result(best_phrase)
|
||
|
||
result = {
|
||
'text': processed_phrase,
|
||
'confidence': confidence,
|
||
'language': self.language,
|
||
'duration': audio_duration,
|
||
'alternatives': []
|
||
}
|
||
|
||
# 添加备选结果
|
||
for phrase, conf in candidates[1:self.recognition_params['max_alternatives']]:
|
||
result['alternatives'].append({
|
||
'text': self._post_process_result(phrase),
|
||
'confidence': conf
|
||
})
|
||
|
||
return result
|
||
else:
|
||
return {
|
||
'text': '',
|
||
'confidence': 0.0,
|
||
'language': self.language,
|
||
'duration': audio_duration
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"✗ 生成模拟识别结果失败: {e}")
|
||
return {
|
||
'text': '',
|
||
'confidence': 0.0,
|
||
'language': self.language,
|
||
'duration': len(audio_data) / self.sample_rate if audio_data is not None else 0
|
||
}
|
||
|
||
def _post_process_result(self, text: str) -> str:
|
||
"""
|
||
后处理识别结果
|
||
|
||
Args:
|
||
text: 原始识别文本
|
||
|
||
Returns:
|
||
处理后的文本
|
||
"""
|
||
try:
|
||
processed_text = text
|
||
|
||
# 应用自定义词汇表
|
||
for word, pronunciation in self.custom_lexicon.items():
|
||
processed_text = processed_text.replace(word, word)
|
||
|
||
# 应用专业术语
|
||
for term, expansion in self.technical_terms.items():
|
||
processed_text = processed_text.replace(term, term)
|
||
|
||
# 应用脏话过滤
|
||
if self.recognition_params['enable_profanity_filter']:
|
||
profanity_list = self.profanity_words.get(self.language, [])
|
||
replace_char = self.recognition_params['profanity_replace_char']
|
||
for profanity in profanity_list:
|
||
processed_text = processed_text.replace(profanity, replace_char * len(profanity))
|
||
|
||
# 应用上下文偏向短语
|
||
for phrase in self.recognition_params['contextual_phrases']:
|
||
# 这里可以实现更复杂的上下文匹配逻辑
|
||
pass
|
||
|
||
return processed_text
|
||
|
||
except Exception as e:
|
||
print(f"✗ 后处理识别结果失败: {e}")
|
||
return text
|
||
|
||
def _cache_recognition_result(self, text: str, result: Dict[str, Any]):
|
||
"""
|
||
缓存识别结果
|
||
|
||
Args:
|
||
text: 识别文本
|
||
result: 识别结果
|
||
"""
|
||
try:
|
||
# 简单的LRU缓存实现
|
||
if len(self.recognition_cache) >= self.cache_size:
|
||
# 移除最旧的条目
|
||
oldest_key = next(iter(self.recognition_cache))
|
||
del self.recognition_cache[oldest_key]
|
||
|
||
self.recognition_cache[text] = {
|
||
'result': result,
|
||
'timestamp': time.time()
|
||
}
|
||
except Exception as e:
|
||
print(f"✗ 缓存识别结果失败: {e}")
|
||
|
||
def set_language(self, language: str):
|
||
"""
|
||
设置识别语言
|
||
|
||
Args:
|
||
language: 语言代码 (如 'zh-CN', 'en-US')
|
||
"""
|
||
self.language = language
|
||
print(f"✓ 识别语言设置为: {language}")
|
||
|
||
def set_recognition_callback(self, callback: Callable):
|
||
"""
|
||
设置识别回调函数
|
||
|
||
Args:
|
||
callback: 回调函数
|
||
"""
|
||
self.recognition_callback = callback
|
||
|
||
def set_partial_result_callback(self, callback: Callable):
|
||
"""
|
||
设置部分结果回调函数
|
||
|
||
Args:
|
||
callback: 回调函数
|
||
"""
|
||
self.partial_result_callback = callback
|
||
|
||
def set_error_callback(self, callback: Callable):
|
||
"""
|
||
设置错误回调函数
|
||
|
||
Args:
|
||
callback: 回调函数
|
||
"""
|
||
self.error_callback = callback
|
||
|
||
def set_voice_activity_callback(self, callback: Callable):
|
||
"""
|
||
设置语音活动回调函数
|
||
|
||
Args:
|
||
callback: 回调函数
|
||
"""
|
||
self.voice_activity_callback = callback
|
||
|
||
def get_stats(self) -> Dict[str, Any]:
|
||
"""
|
||
获取统计信息
|
||
|
||
Returns:
|
||
统计信息字典
|
||
"""
|
||
# 计算每分钟单词数
|
||
if self.stats['total_voice_duration'] > 0:
|
||
self.stats['words_per_minute'] = (
|
||
self.stats['recognitions_processed'] * 10 / # 假设每次识别约10个单词
|
||
(self.stats['total_voice_duration'] / 60.0)
|
||
)
|
||
|
||
return self.stats.copy()
|
||
|
||
def get_available_languages(self) -> List[str]:
|
||
"""
|
||
获取支持的语言列表
|
||
|
||
Returns:
|
||
语言代码列表
|
||
"""
|
||
# 模拟支持的语言列表
|
||
return [
|
||
'zh-CN', # 简体中文
|
||
'zh-TW', # 繁体中文
|
||
'en-US', # 英语(美国)
|
||
'en-GB', # 英语(英国)
|
||
'ja-JP', # 日语
|
||
'ko-KR', # 韩语
|
||
'fr-FR', # 法语
|
||
'de-DE', # 德语
|
||
'es-ES', # 西班牙语
|
||
'ru-RU', # 俄语
|
||
'ar-SA', # 阿拉伯语
|
||
'pt-BR', # 葡萄牙语(巴西)
|
||
'it-IT', # 意大利语
|
||
'nl-NL', # 荷兰语
|
||
'pl-PL', # 波兰语
|
||
'th-TH', # 泰语
|
||
'vi-VN', # 越南语
|
||
'tr-TR', # 土耳其语
|
||
'cs-CZ', # 捷克语
|
||
'uk-UA' # 乌克兰语
|
||
]
|
||
|
||
def set_silence_threshold(self, threshold: float):
|
||
"""
|
||
设置静音阈值
|
||
|
||
Args:
|
||
threshold: 静音阈值
|
||
"""
|
||
self.recognition_params['silence_threshold'] = max(0.0, min(1.0, threshold))
|
||
print(f"✓ 静音阈值设置为: {threshold}")
|
||
|
||
def set_voice_threshold(self, threshold: float):
|
||
"""
|
||
设置语音阈值
|
||
|
||
Args:
|
||
threshold: 语音阈值
|
||
"""
|
||
self.recognition_params['voice_threshold'] = max(0.0, min(1.0, threshold))
|
||
print(f"✓ 语音阈值设置为: {threshold}")
|
||
|
||
def set_silence_duration(self, duration: float):
|
||
"""
|
||
设置静音检测时长
|
||
|
||
Args:
|
||
duration: 静音检测时长(秒)
|
||
"""
|
||
self.recognition_params['silence_duration'] = max(0.1, duration)
|
||
print(f"✓ 静音检测时长设置为: {duration} 秒")
|
||
|
||
def set_min_voice_duration(self, duration: float):
|
||
"""
|
||
设置最短语音时长
|
||
|
||
Args:
|
||
duration: 最短语音时长(秒)
|
||
"""
|
||
self.recognition_params['min_voice_duration'] = max(0.1, duration)
|
||
print(f"✓ 最短语音时长设置为: {duration} 秒")
|
||
|
||
def set_max_recording_duration(self, duration: float):
|
||
"""
|
||
设置最大录音时长
|
||
|
||
Args:
|
||
duration: 最大录音时长(秒)
|
||
"""
|
||
self.recognition_params['max_recording_duration'] = max(1.0, duration)
|
||
print(f"✓ 最大录音时长设置为: {duration} 秒")
|
||
|
||
def is_voice_detected(self) -> bool:
|
||
"""
|
||
检查是否检测到语音
|
||
|
||
Returns:
|
||
是否检测到语音
|
||
"""
|
||
return self.is_voice_detected
|
||
|
||
def get_current_buffer_duration(self) -> float:
|
||
"""
|
||
获取当前缓冲区音频时长
|
||
|
||
Returns:
|
||
音频时长(秒)
|
||
"""
|
||
try:
|
||
total_samples = sum(len(chunk) for chunk in self.audio_buffer)
|
||
return total_samples / self.sample_rate
|
||
except Exception as e:
|
||
print(f"✗ 计算缓冲区时长失败: {e}")
|
||
return 0.0
|
||
|
||
def clear_audio_buffer(self):
|
||
"""清空音频缓冲区"""
|
||
self.audio_buffer = []
|
||
self.silence_counter = 0
|
||
self.voice_activity_counter = 0
|
||
self.is_voice_detected = False
|
||
self.voice_start_time = 0.0
|
||
self.last_voice_time = 0.0
|
||
print("✓ 音频缓冲区已清空")
|
||
|
||
def add_custom_vocabulary(self, words: List[str]):
|
||
"""
|
||
添加自定义词汇表
|
||
|
||
Args:
|
||
words: 词汇列表
|
||
"""
|
||
try:
|
||
for word in words:
|
||
# 简单处理,实际应用中可能需要提供发音信息
|
||
self.custom_lexicon[word] = word.upper()
|
||
print(f"✓ 添加自定义词汇: {words}")
|
||
except Exception as e:
|
||
print(f"✗ 添加自定义词汇失败: {e}")
|
||
|
||
def set_recognition_mode(self, mode: str):
|
||
"""
|
||
设置识别模式
|
||
|
||
Args:
|
||
mode: 识别模式 ('command', 'dictation', 'search')
|
||
"""
|
||
valid_modes = ['command', 'dictation', 'search']
|
||
if mode in valid_modes:
|
||
self.recognition_mode = mode
|
||
print(f"✓ 识别模式设置为: {mode}")
|
||
else:
|
||
print(f"✗ 无效的识别模式: {mode}")
|
||
|
||
def enable_punctuation(self, enable: bool = True):
|
||
"""
|
||
启用/禁用标点符号识别
|
||
|
||
Args:
|
||
enable: 是否启用标点符号识别
|
||
"""
|
||
self.recognition_params['enable_punctuation'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 标点符号识别已{state}")
|
||
|
||
def enable_profanity_filter(self, enable: bool = True):
|
||
"""
|
||
启用/禁用脏话过滤
|
||
|
||
Args:
|
||
enable: 是否启用脏话过滤
|
||
"""
|
||
self.recognition_params['enable_profanity_filter'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 脏话过滤已{state}")
|
||
|
||
def set_alternative_count(self, count: int):
|
||
"""
|
||
设置备选结果数量
|
||
|
||
Args:
|
||
count: 备选结果数量
|
||
"""
|
||
count = max(0, min(30, count)) # 限制在0-30之间
|
||
self.recognition_params['max_alternatives'] = count
|
||
print(f"✓ 备选结果数量设置为: {count}")
|
||
|
||
def enable_word_timing(self, enable: bool = True):
|
||
"""
|
||
启用/禁用单词时间戳
|
||
|
||
Args:
|
||
enable: 是否启用单词时间戳
|
||
"""
|
||
self.recognition_params['enable_word_timing'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 单词时间戳已{state}")
|
||
|
||
def set_max_alternatives(self, max_alternatives: int):
|
||
"""
|
||
设置最大备选结果数
|
||
|
||
Args:
|
||
max_alternatives: 最大备选结果数
|
||
"""
|
||
self.recognition_params['max_alternatives'] = max(1, max_alternatives)
|
||
print(f"✓ 最大备选结果数设置为: {max_alternatives}")
|
||
|
||
def enable_speaker_diarization(self, enable: bool = True):
|
||
"""
|
||
启用/禁用说话人分离
|
||
|
||
Args:
|
||
enable: 是否启用说话人分离
|
||
"""
|
||
self.recognition_params['enable_speaker_diarization'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 说话人分离已{state}")
|
||
|
||
def set_speaker_count(self, min_speakers: int, max_speakers: int):
|
||
"""
|
||
设置说话人数量范围
|
||
|
||
Args:
|
||
min_speakers: 最少说话人数
|
||
max_speakers: 最多说话人数
|
||
"""
|
||
min_speakers = max(1, min_speakers)
|
||
max_speakers = max(min_speakers, max_speakers)
|
||
self.recognition_params['min_speaker_count'] = min_speakers
|
||
self.recognition_params['max_speaker_count'] = max_speakers
|
||
print(f"✓ 说话人数量范围设置为: {min_speakers}-{max_speakers}")
|
||
|
||
def enable_audio_preprocessing(self, enable: bool = True):
|
||
"""
|
||
启用/禁用音频预处理
|
||
|
||
Args:
|
||
enable: 是否启用音频预处理
|
||
"""
|
||
self.recognition_params['enable_audio_preprocessing'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 音频预处理已{state}")
|
||
|
||
def set_noise_reduction_level(self, level: float):
|
||
"""
|
||
设置噪声消除级别
|
||
|
||
Args:
|
||
level: 噪声消除级别 (0.0-1.0)
|
||
"""
|
||
level = max(0.0, min(1.0, level))
|
||
self.recognition_params['noise_reduction_level'] = level
|
||
print(f"✓ 噪声消除级别设置为: {level}")
|
||
|
||
def enable_real_time_feedback(self, enable: bool = True):
|
||
"""
|
||
启用/禁用实时反馈
|
||
|
||
Args:
|
||
enable: 是否启用实时反馈
|
||
"""
|
||
self.recognition_params['enable_real_time_feedback'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 实时反馈已{state}")
|
||
|
||
def set_confidence_threshold(self, threshold: float):
|
||
"""
|
||
设置置信度阈值
|
||
|
||
Args:
|
||
threshold: 置信度阈值 (0.0-1.0)
|
||
"""
|
||
threshold = max(0.0, min(1.0, threshold))
|
||
self.recognition_params['confidence_threshold'] = threshold
|
||
print(f"✓ 置信度阈值设置为: {threshold}")
|
||
|
||
def enable_contextual_biasing(self, enable: bool = True):
|
||
"""
|
||
启用/禁用上下文偏向
|
||
|
||
Args:
|
||
enable: 是否启用上下文偏向
|
||
"""
|
||
self.recognition_params['enable_contextual_biasing'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 上下文偏向已{state}")
|
||
|
||
def add_contextual_phrases(self, phrases: List[str]):
|
||
"""
|
||
添加上下文短语
|
||
|
||
Args:
|
||
phrases: 短语列表
|
||
"""
|
||
try:
|
||
self.recognition_params['contextual_phrases'].extend(phrases)
|
||
print(f"✓ 添加上下文短语: {phrases}")
|
||
except Exception as e:
|
||
print(f"✗ 添加上下文短语失败: {e}")
|
||
|
||
def set_model_variant(self, variant: str):
|
||
"""
|
||
设置模型变体
|
||
|
||
Args:
|
||
variant: 模型变体 ('default', 'command_and_search', 'phone_call', 'video')
|
||
"""
|
||
valid_variants = ['default', 'command_and_search', 'phone_call', 'video']
|
||
if variant in valid_variants:
|
||
self.recognition_params['model_variant'] = variant
|
||
print(f"✓ 模型变体设置为: {variant}")
|
||
else:
|
||
print(f"✗ 无效的模型变体: {variant}")
|
||
|
||
def enable_enhanced_models(self, enable: bool = True):
|
||
"""
|
||
启用/禁用增强模型
|
||
|
||
Args:
|
||
enable: 是否启用增强模型
|
||
"""
|
||
self.recognition_params['enable_enhanced_models'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 增强模型已{state}")
|
||
|
||
def set_recognition_timeout(self, timeout: float):
|
||
"""
|
||
设置识别超时时间
|
||
|
||
Args:
|
||
timeout: 超时时间(秒)
|
||
"""
|
||
timeout = max(0.1, timeout)
|
||
self.recognition_params['recognition_timeout'] = timeout
|
||
print(f"✓ 识别超时时间设置为: {timeout} 秒")
|
||
|
||
def set_profanity_replace_char(self, char: str):
|
||
"""
|
||
设置脏话替换字符
|
||
|
||
Args:
|
||
char: 替换字符
|
||
"""
|
||
self.recognition_params['profanity_replace_char'] = char
|
||
print(f"✓ 脏话替换字符设置为: {char}")
|
||
|
||
def get_cached_results(self) -> Dict[str, Any]:
|
||
"""
|
||
获取缓存的识别结果
|
||
|
||
Returns:
|
||
缓存结果字典
|
||
"""
|
||
return self.recognition_cache.copy()
|
||
|
||
def clear_cache(self):
|
||
"""清空识别结果缓存"""
|
||
self.recognition_cache.clear()
|
||
print("✓ 识别结果缓存已清空")
|
||
|
||
def set_cache_size(self, size: int):
|
||
"""
|
||
设置缓存大小
|
||
|
||
Args:
|
||
size: 缓存大小
|
||
"""
|
||
self.cache_size = max(1, size)
|
||
print(f"✓ 缓存大小设置为: {size}")
|
||
|
||
def export_stats(self, filename: str) -> bool:
|
||
"""
|
||
导出统计信息到文件
|
||
|
||
Args:
|
||
filename: 文件名
|
||
|
||
Returns:
|
||
是否导出成功
|
||
"""
|
||
try:
|
||
stats_data = {
|
||
'timestamp': time.time(),
|
||
'stats': self.get_stats(),
|
||
'params': self.recognition_params,
|
||
'language': self.language,
|
||
'mode': self.recognition_mode
|
||
}
|
||
|
||
with open(filename, 'w', encoding='utf-8') as f:
|
||
json.dump(stats_data, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"✓ 统计信息已导出到: {filename}")
|
||
return True
|
||
except Exception as e:
|
||
print(f"✗ 导出统计信息失败: {e}")
|
||
return False
|
||
|
||
def import_custom_lexicon(self, filename: str) -> bool:
|
||
"""
|
||
从文件导入自定义词汇表
|
||
|
||
Args:
|
||
filename: 文件名
|
||
|
||
Returns:
|
||
是否导入成功
|
||
"""
|
||
try:
|
||
with open(filename, 'r', encoding='utf-8') as f:
|
||
lexicon_data = json.load(f)
|
||
self.custom_lexicon.update(lexicon_data)
|
||
|
||
print(f"✓ 自定义词汇表已从 {filename} 导入")
|
||
return True
|
||
except Exception as e:
|
||
print(f"✗ 导入自定义词汇表失败: {e}")
|
||
return False
|
||
|
||
def export_custom_lexicon(self, filename: str) -> bool:
|
||
"""
|
||
导出自定义词汇表到文件
|
||
|
||
Args:
|
||
filename: 文件名
|
||
|
||
Returns:
|
||
是否导出成功
|
||
"""
|
||
try:
|
||
with open(filename, 'w', encoding='utf-8') as f:
|
||
json.dump(self.custom_lexicon, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"✓ 自定义词汇表已导出到: {filename}")
|
||
return True
|
||
except Exception as e:
|
||
print(f"✗ 导出自定义词汇表失败: {e}")
|
||
return False
|
||
|
||
def reset_stats(self):
|
||
"""重置统计信息"""
|
||
self.stats = {
|
||
'recognitions_processed': 0,
|
||
'recognition_errors': 0,
|
||
'total_recognition_time': 0.0,
|
||
'average_recognition_time': 0.0,
|
||
'voice_segments_detected': 0,
|
||
'total_voice_duration': 0.0,
|
||
'words_per_minute': 0.0,
|
||
'confidence_average': 0.0,
|
||
'recognition_accuracy': 0.0
|
||
}
|
||
print("✓ 统计信息已重置")
|
||
|
||
def set_speaker_info(self, speaker_id: str, info: Dict[str, Any]):
|
||
"""
|
||
设置说话人信息
|
||
|
||
Args:
|
||
speaker_id: 说话人ID
|
||
info: 说话人信息
|
||
"""
|
||
self.speaker_info[speaker_id] = info
|
||
print(f"✓ 说话人 {speaker_id} 信息已设置")
|
||
|
||
def get_speaker_info(self, speaker_id: str) -> Dict[str, Any]:
|
||
"""
|
||
获取说话人信息
|
||
|
||
Args:
|
||
speaker_id: 说话人ID
|
||
|
||
Returns:
|
||
说话人信息
|
||
"""
|
||
return self.speaker_info.get(speaker_id, {})
|
||
|
||
def remove_speaker_info(self, speaker_id: str):
|
||
"""
|
||
移除说话人信息
|
||
|
||
Args:
|
||
speaker_id: 说话人ID
|
||
"""
|
||
if speaker_id in self.speaker_info:
|
||
del self.speaker_info[speaker_id]
|
||
print(f"✓ 说话人 {speaker_id} 信息已移除")
|
||
|
||
def enable_continuous_recognition(self, enable: bool = True):
|
||
"""
|
||
启用/禁用连续识别
|
||
|
||
Args:
|
||
enable: 是否启用连续识别
|
||
"""
|
||
# 连续识别是默认行为,这里只是记录设置
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 连续识别已{state}")
|
||
|
||
def set_adaptive_sensitivity(self, enable: bool = True):
|
||
"""
|
||
启用/禁用自适应灵敏度
|
||
|
||
Args:
|
||
enable: 是否启用自适应灵敏度
|
||
"""
|
||
# 在实际实现中,这会根据环境噪声自动调整阈值
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 自适应灵敏度已{state}")
|
||
|
||
def enable_dialect_support(self, enable: bool = True):
|
||
"""
|
||
启用/禁用方言支持
|
||
|
||
Args:
|
||
enable: 是否启用方言支持
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 方言支持已{state}")
|
||
|
||
def set_dialect_preference(self, dialects: List[str]):
|
||
"""
|
||
设置方言偏好
|
||
|
||
Args:
|
||
dialects: 方言列表
|
||
"""
|
||
print(f"✓ 方言偏好设置为: {dialects}")
|
||
|
||
def enable_accent_adaptation(self, enable: bool = True):
|
||
"""
|
||
启用/禁用口音适应
|
||
|
||
Args:
|
||
enable: 是否启用口音适应
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 口音适应已{state}")
|
||
|
||
def add_accent_sample(self, audio_data: np.ndarray, accent_type: str):
|
||
"""
|
||
添加口音样本用于适应
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
accent_type: 口音类型
|
||
"""
|
||
print(f"✓ 已添加 {accent_type} 口音样本")
|
||
|
||
def enable_domain_optimization(self, enable: bool = True):
|
||
"""
|
||
启用/禁用领域优化
|
||
|
||
Args:
|
||
enable: 是否启用领域优化
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 领域优化已{state}")
|
||
|
||
def set_domain_context(self, domain: str):
|
||
"""
|
||
设置领域上下文
|
||
|
||
Args:
|
||
domain: 领域类型 ('medical', 'legal', 'technical', 'general')
|
||
"""
|
||
valid_domains = ['medical', 'legal', 'technical', 'general']
|
||
if domain in valid_domains:
|
||
print(f"✓ 领域上下文设置为: {domain}")
|
||
else:
|
||
print(f"✗ 无效的领域类型: {domain}")
|
||
|
||
def enable_low_latency_mode(self, enable: bool = True):
|
||
"""
|
||
启用/禁用低延迟模式
|
||
|
||
Args:
|
||
enable: 是否启用低延迟模式
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 低延迟模式已{state}")
|
||
|
||
def set_latency_preference(self, preference: str):
|
||
"""
|
||
设置延迟偏好
|
||
|
||
Args:
|
||
preference: 延迟偏好 ('accuracy', 'speed', 'balanced')
|
||
"""
|
||
valid_preferences = ['accuracy', 'speed', 'balanced']
|
||
if preference in valid_preferences:
|
||
print(f"✓ 延迟偏好设置为: {preference}")
|
||
else:
|
||
print(f"✗ 无效的延迟偏好: {preference}")
|
||
|
||
def enable_offline_recognition(self, enable: bool = True):
|
||
"""
|
||
启用/禁用离线识别
|
||
|
||
Args:
|
||
enable: 是否启用离线识别
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 离线识别已{state}")
|
||
|
||
def update_offline_model(self, model_path: str):
|
||
"""
|
||
更新离线识别模型
|
||
|
||
Args:
|
||
model_path: 模型路径
|
||
"""
|
||
print(f"✓ 离线识别模型更新为: {model_path}")
|
||
|
||
def set_recognition_sensitivity(self, sensitivity: float):
|
||
"""
|
||
设置识别灵敏度
|
||
|
||
Args:
|
||
sensitivity: 灵敏度 (0.0-1.0)
|
||
"""
|
||
sensitivity = max(0.0, min(1.0, sensitivity))
|
||
# 调整语音和静音阈值
|
||
self.recognition_params['voice_threshold'] = 0.005 + sensitivity * 0.02
|
||
self.recognition_params['silence_threshold'] = 0.001 + sensitivity * 0.01
|
||
print(f"✓ 识别灵敏度设置为: {sensitivity}")
|
||
|
||
def enable_auto_punctuation(self, enable: bool = True):
|
||
"""
|
||
启用/禁用自动标点符号
|
||
|
||
Args:
|
||
enable: 是否启用自动标点符号
|
||
"""
|
||
self.recognition_params['enable_punctuation'] = enable
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 自动标点符号已{state}")
|
||
|
||
def set_punctuation_style(self, style: str):
|
||
"""
|
||
设置标点符号风格
|
||
|
||
Args:
|
||
style: 标点符号风格 ('casual', 'formal', 'technical')
|
||
"""
|
||
valid_styles = ['casual', 'formal', 'technical']
|
||
if style in valid_styles:
|
||
print(f"✓ 标点符号风格设置为: {style}")
|
||
else:
|
||
print(f"✗ 无效的标点符号风格: {style}")
|
||
|
||
def enable_emotion_detection(self, enable: bool = True):
|
||
"""
|
||
启用/禁用情感检测
|
||
|
||
Args:
|
||
enable: 是否启用情感检测
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 情感检测已{state}")
|
||
|
||
def get_emotion_analysis(self, text: str) -> Dict[str, float]:
|
||
"""
|
||
获取文本情感分析
|
||
|
||
Args:
|
||
text: 文本内容
|
||
|
||
Returns:
|
||
情感分析结果
|
||
"""
|
||
# 模拟情感分析结果
|
||
return {
|
||
'positive': 0.6,
|
||
'negative': 0.2,
|
||
'neutral': 0.2,
|
||
'confidence': 0.8
|
||
}
|
||
|
||
def enable_intent_recognition(self, enable: bool = True):
|
||
"""
|
||
启用/禁用意图识别
|
||
|
||
Args:
|
||
enable: 是否启用意图识别
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 意图识别已{state}")
|
||
|
||
def set_intent_context(self, context: str):
|
||
"""
|
||
设置意图识别上下文
|
||
|
||
Args:
|
||
context: 上下文类型
|
||
"""
|
||
print(f"✓ 意图识别上下文设置为: {context}")
|
||
|
||
def recognize_with_context(self, audio_data: np.ndarray, context: str = None) -> Dict[str, Any]:
|
||
"""
|
||
带上下文的语音识别
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
context: 上下文信息
|
||
|
||
Returns:
|
||
识别结果
|
||
"""
|
||
try:
|
||
# 执行识别
|
||
result = self._perform_recognition()
|
||
|
||
# 添加上下文信息
|
||
if context:
|
||
result['context'] = context
|
||
|
||
return result
|
||
except Exception as e:
|
||
print(f"✗ 带上下文的语音识别失败: {e}")
|
||
return {}
|
||
|
||
def batch_recognize(self, audio_segments: List[np.ndarray]) -> List[Dict[str, Any]]:
|
||
"""
|
||
批量语音识别
|
||
|
||
Args:
|
||
audio_segments: 音频片段列表
|
||
|
||
Returns:
|
||
识别结果列表
|
||
"""
|
||
try:
|
||
results = []
|
||
for i, segment in enumerate(audio_segments):
|
||
print(f"✓ 处理音频片段 {i+1}/{len(audio_segments)}")
|
||
# 临时替换音频缓冲区
|
||
original_buffer = self.audio_buffer
|
||
self.audio_buffer = [segment]
|
||
|
||
result = self._perform_recognition()
|
||
results.append(result)
|
||
|
||
# 恢复音频缓冲区
|
||
self.audio_buffer = original_buffer
|
||
|
||
return results
|
||
except Exception as e:
|
||
print(f"✗ 批量语音识别失败: {e}")
|
||
return []
|
||
|
||
def recognize_with_feedback(self, audio_data: np.ndarray,
|
||
feedback_callback: Callable[[str], None] = None) -> Dict[str, Any]:
|
||
"""
|
||
带反馈的语音识别
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
feedback_callback: 反馈回调函数
|
||
|
||
Returns:
|
||
识别结果
|
||
"""
|
||
try:
|
||
# 模拟实时反馈过程
|
||
if feedback_callback and self.recognition_params['enable_real_time_feedback']:
|
||
feedback_callback("开始识别...")
|
||
time.sleep(0.05) # 模拟处理时间
|
||
feedback_callback("分析音频特征...")
|
||
time.sleep(0.05)
|
||
feedback_callback("匹配语言模型...")
|
||
time.sleep(0.05)
|
||
feedback_callback("生成识别结果...")
|
||
|
||
result = self._perform_recognition()
|
||
|
||
if feedback_callback:
|
||
feedback_callback(f"识别完成: {result.get('text', '')}")
|
||
|
||
return result
|
||
except Exception as e:
|
||
print(f"✗ 带反馈的语音识别失败: {e}")
|
||
return {}
|
||
|
||
def set_recognition_profile(self, profile: str):
|
||
"""
|
||
设置识别配置文件
|
||
|
||
Args:
|
||
profile: 配置文件 ('fast', 'accurate', 'balanced')
|
||
"""
|
||
valid_profiles = ['fast', 'accurate', 'balanced']
|
||
if profile in valid_profiles:
|
||
# 根据配置文件调整参数
|
||
if profile == 'fast':
|
||
self.recognition_params['enable_enhanced_models'] = False
|
||
self.recognition_params['max_alternatives'] = 1
|
||
self.recognition_params['enable_word_timing'] = False
|
||
elif profile == 'accurate':
|
||
self.recognition_params['enable_enhanced_models'] = True
|
||
self.recognition_params['max_alternatives'] = 5
|
||
self.recognition_params['enable_word_timing'] = True
|
||
elif profile == 'balanced':
|
||
self.recognition_params['enable_enhanced_models'] = True
|
||
self.recognition_params['max_alternatives'] = 3
|
||
self.recognition_params['enable_word_timing'] = True
|
||
|
||
print(f"✓ 识别配置文件设置为: {profile}")
|
||
else:
|
||
print(f"✗ 无效的配置文件: {profile}")
|
||
|
||
def enable_multilingual_recognition(self, enable: bool = True):
|
||
"""
|
||
启用/禁用多语言识别
|
||
|
||
Args:
|
||
enable: 是否启用多语言识别
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 多语言识别已{state}")
|
||
|
||
def set_multilingual_languages(self, languages: List[str]):
|
||
"""
|
||
设置多语言识别的语言列表
|
||
|
||
Args:
|
||
languages: 语言列表
|
||
"""
|
||
print(f"✓ 多语言识别语言设置为: {languages}")
|
||
|
||
def recognize_multilingual(self, audio_data: np.ndarray) -> Dict[str, Any]:
|
||
"""
|
||
多语言语音识别
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
|
||
Returns:
|
||
识别结果
|
||
"""
|
||
try:
|
||
# 执行识别
|
||
result = self._perform_recognition()
|
||
|
||
# 添加多语言支持信息
|
||
result['multilingual'] = True
|
||
result['detected_languages'] = [self.language]
|
||
|
||
return result
|
||
except Exception as e:
|
||
print(f"✗ 多语言语音识别失败: {e}")
|
||
return {}
|
||
|
||
def enable_voice_profile_adaptation(self, enable: bool = True):
|
||
"""
|
||
启用/禁用语音配置文件适应
|
||
|
||
Args:
|
||
enable: 是否启用语音配置文件适应
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 语音配置文件适应已{state}")
|
||
|
||
def create_voice_profile(self, profile_name: str, audio_samples: List[np.ndarray]) -> bool:
|
||
"""
|
||
创建语音配置文件
|
||
|
||
Args:
|
||
profile_name: 配置文件名称
|
||
audio_samples: 音频样本列表
|
||
|
||
Returns:
|
||
是否创建成功
|
||
"""
|
||
try:
|
||
print(f"✓ 语音配置文件 '{profile_name}' 已创建")
|
||
return True
|
||
except Exception as e:
|
||
print(f"✗ 创建语音配置文件失败: {e}")
|
||
return False
|
||
|
||
def set_active_voice_profile(self, profile_name: str):
|
||
"""
|
||
设置活动语音配置文件
|
||
|
||
Args:
|
||
profile_name: 配置文件名称
|
||
"""
|
||
print(f"✓ 活动语音配置文件设置为: {profile_name}")
|
||
|
||
def enable_custom_acoustic_model(self, enable: bool = True):
|
||
"""
|
||
启用/禁用自定义声学模型
|
||
|
||
Args:
|
||
enable: 是否启用自定义声学模型
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 自定义声学模型已{state}")
|
||
|
||
def load_custom_acoustic_model(self, model_path: str):
|
||
"""
|
||
加载自定义声学模型
|
||
|
||
Args:
|
||
model_path: 模型路径
|
||
"""
|
||
print(f"✓ 自定义声学模型已加载: {model_path}")
|
||
|
||
def enable_custom_language_model(self, enable: bool = True):
|
||
"""
|
||
启用/禁用自定义语言模型
|
||
|
||
Args:
|
||
enable: 是否启用自定义语言模型
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 自定义语言模型已{state}")
|
||
|
||
def load_custom_language_model(self, model_path: str):
|
||
"""
|
||
加载自定义语言模型
|
||
|
||
Args:
|
||
model_path: 模型路径
|
||
"""
|
||
print(f"✓ 自定义语言模型已加载: {model_path}")
|
||
|
||
def set_recognition_priority(self, priority: str):
|
||
"""
|
||
设置识别优先级
|
||
|
||
Args:
|
||
priority: 优先级 ('realtime', 'quality', 'balanced')
|
||
"""
|
||
valid_priorities = ['realtime', 'quality', 'balanced']
|
||
if priority in valid_priorities:
|
||
print(f"✓ 识别优先级设置为: {priority}")
|
||
else:
|
||
print(f"✗ 无效的优先级: {priority}")
|
||
|
||
def enable_confidence_scoring(self, enable: bool = True):
|
||
"""
|
||
启用/禁用置信度评分
|
||
|
||
Args:
|
||
enable: 是否启用置信度评分
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 置信度评分已{state}")
|
||
|
||
def set_confidence_scoring_mode(self, mode: str):
|
||
"""
|
||
设置置信度评分模式
|
||
|
||
Args:
|
||
mode: 评分模式 ('basic', 'detailed', 'comprehensive')
|
||
"""
|
||
valid_modes = ['basic', 'detailed', 'comprehensive']
|
||
if mode in valid_modes:
|
||
print(f"✓ 置信度评分模式设置为: {mode}")
|
||
else:
|
||
print(f"✗ 无效的评分模式: {mode}")
|
||
|
||
def enable_lattice_generation(self, enable: bool = True):
|
||
"""
|
||
启用/禁用格子生成
|
||
|
||
Args:
|
||
enable: 是否启用格子生成
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 格子生成已{state}")
|
||
|
||
def get_recognition_lattice(self) -> Dict[str, Any]:
|
||
"""
|
||
获取识别格子
|
||
|
||
Returns:
|
||
识别格子数据
|
||
"""
|
||
# 模拟格子数据
|
||
return {
|
||
'nodes': [],
|
||
'edges': [],
|
||
'timestamp': time.time()
|
||
}
|
||
|
||
def enable_audio_segmentation(self, enable: bool = True):
|
||
"""
|
||
启用/禁用音频分割
|
||
|
||
Args:
|
||
enable: 是否启用音频分割
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 音频分割已{state}")
|
||
|
||
def set_segmentation_strategy(self, strategy: str):
|
||
"""
|
||
设置分割策略
|
||
|
||
Args:
|
||
strategy: 分割策略 ('silence', 'content', 'hybrid')
|
||
"""
|
||
valid_strategies = ['silence', 'content', 'hybrid']
|
||
if strategy in valid_strategies:
|
||
print(f"✓ 分割策略设置为: {strategy}")
|
||
else:
|
||
print(f"✗ 无效的分割策略: {strategy}")
|
||
|
||
def recognize_with_correction(self, audio_data: np.ndarray,
|
||
correction_history: List[str] = None) -> Dict[str, Any]:
|
||
"""
|
||
带纠错的语音识别
|
||
|
||
Args:
|
||
audio_data: 音频数据
|
||
correction_history: 纠正历史
|
||
|
||
Returns:
|
||
识别结果
|
||
"""
|
||
try:
|
||
result = self._perform_recognition()
|
||
|
||
# 应用纠正历史
|
||
if correction_history:
|
||
result['correction_history'] = correction_history
|
||
|
||
return result
|
||
except Exception as e:
|
||
print(f"✗ 带纠错的语音识别失败: {e}")
|
||
return {}
|
||
|
||
def enable_robust_recognition(self, enable: bool = True):
|
||
"""
|
||
启用/禁用鲁棒识别
|
||
|
||
Args:
|
||
enable: 是否启用鲁棒识别
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 鲁棒识别已{state}")
|
||
|
||
def set_robustness_level(self, level: float):
|
||
"""
|
||
设置鲁棒性级别
|
||
|
||
Args:
|
||
level: 鲁棒性级别 (0.0-1.0)
|
||
"""
|
||
level = max(0.0, min(1.0, level))
|
||
print(f"✓ 鲁棒性级别设置为: {level}")
|
||
|
||
def enable_adaptive_noise_cancellation(self, enable: bool = True):
|
||
"""
|
||
启用/禁用自适应噪声消除
|
||
|
||
Args:
|
||
enable: 是否启用自适应噪声消除
|
||
"""
|
||
state = "启用" if enable else "禁用"
|
||
print(f"✓ 自适应噪声消除已{state}")
|
||
|
||
def set_noise_cancellation_profile(self, profile: str):
|
||
"""
|
||
设置噪声消除配置文件
|
||
|
||
Args:
|
||
profile: 配置文件 ('light', 'medium', 'heavy', 'adaptive')
|
||
"""
|
||
valid_profiles = ['light', 'medium', 'heavy', 'adaptive']
|
||
if profile in valid_profiles:
|
||
print(f"✓ 噪声消除配置文件设置为: {profile}")
|
||
else:
|
||
print(f"✗ 无效的配置文件: {profile}") |