EG/plugins/user/speech_recognition_synthesis/plugin.py
2025-12-12 16:16:15 +08:00

522 lines
16 KiB
Python

"""
语音识别和合成插件
为EG引擎提供语音识别和文本转语音功能
"""
import sys
import os
import time
import threading
from typing import Dict, Any, Callable, List
# 添加项目根目录到Python路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from core.speech_manager import SpeechManager
from recognition.speech_recognizer import SpeechRecognizer
from synthesis.speech_synthesizer import SpeechSynthesizer
from utils.audio_utils import AudioUtils
class SpeechRecognitionSynthesisPlugin:
"""
语音识别和合成插件主类
提供完整的语音处理功能,包括语音识别和文本转语音合成
"""
def __init__(self, world):
"""
初始化语音识别和合成插件
Args:
world: EG引擎世界对象
"""
self.world = world
self.plugin_name = "speech_recognition_synthesis"
self.version = "1.0.0"
self.author = "EG Team"
# 插件状态
self.enabled = False
self.initialized = False
self.is_listening = False
# 核心组件
self.speech_manager = None
self.speech_recognizer = None
self.speech_synthesizer = None
self.audio_utils = None
# 配置参数
self.config = {
'sample_rate': 16000,
'channels': 1,
'buffer_size': 1024,
'language': 'zh-CN',
'voice': 'default',
'volume': 1.0,
'speed': 1.0,
'pitch': 1.0
}
# 回调函数
self.speech_recognized_callback = None
self.speech_synthesis_callback = None
self.error_callback = None
# 统计信息
self.stats = {
'recognized_count': 0,
'synthesized_count': 0,
'recognition_errors': 0,
'synthesis_errors': 0,
'processing_time': 0.0
}
# 线程管理
self.recognition_thread = None
self.synthesis_thread = None
self.running = False
print(f"✓ 语音识别和合成插件 v{self.version} 已创建")
def initialize(self) -> bool:
"""
初始化插件
Returns:
是否初始化成功
"""
try:
# 初始化核心组件
self.speech_manager = SpeechManager(self)
self.speech_recognizer = SpeechRecognizer(self)
self.speech_synthesizer = SpeechSynthesizer(self)
self.audio_utils = AudioUtils(self)
# 初始化各组件
if not self.speech_manager.initialize():
print("✗ 语音管理器初始化失败")
return False
if not self.speech_recognizer.initialize():
print("✗ 语音识别器初始化失败")
return False
if not self.speech_synthesizer.initialize():
print("✗ 语音合成器初始化失败")
return False
self.initialized = True
print("✓ 语音识别和合成插件初始化完成")
return True
except Exception as e:
print(f"✗ 插件初始化失败: {e}")
import traceback
traceback.print_exc()
return False
def enable(self) -> bool:
"""
启用插件
Returns:
是否启用成功
"""
try:
if not self.initialized:
print("✗ 插件未初始化")
return False
# 启用各组件
if not self.speech_manager.enable():
print("✗ 语音管理器启用失败")
return False
if not self.speech_recognizer.enable():
print("✗ 语音识别器启用失败")
return False
if not self.speech_synthesizer.enable():
print("✗ 语音合成器启用失败")
return False
self.enabled = True
self.running = True
# 启动识别线程
self.recognition_thread = threading.Thread(target=self._recognition_worker, daemon=True)
self.recognition_thread.start()
print("✓ 语音识别和合成插件已启用")
return True
except Exception as e:
print(f"✗ 插件启用失败: {e}")
import traceback
traceback.print_exc()
return False
def disable(self):
"""禁用插件"""
try:
self.running = False
self.enabled = False
# 禁用各组件
if self.speech_synthesizer:
self.speech_synthesizer.disable()
if self.speech_recognizer:
self.speech_recognizer.disable()
if self.speech_manager:
self.speech_manager.disable()
print("✓ 语音识别和合成插件已禁用")
except Exception as e:
print(f"✗ 插件禁用失败: {e}")
import traceback
traceback.print_exc()
def finalize(self):
"""清理插件资源"""
try:
self.disable()
# 清理各组件
if self.speech_synthesizer:
self.speech_synthesizer.finalize()
self.speech_synthesizer = None
if self.speech_recognizer:
self.speech_recognizer.finalize()
self.speech_recognizer = None
if self.speech_manager:
self.speech_manager.finalize()
self.speech_manager = None
if self.audio_utils:
self.audio_utils = None
self.initialized = False
print("✓ 语音识别和合成插件资源已清理")
except Exception as e:
print(f"✗ 插件资源清理失败: {e}")
import traceback
traceback.print_exc()
def update(self, dt: float):
"""
更新插件状态
Args:
dt: 时间增量
"""
try:
if not self.enabled:
return
# 更新各组件
if self.speech_manager:
self.speech_manager.update(dt)
if self.speech_recognizer:
self.speech_recognizer.update(dt)
if self.speech_synthesizer:
self.speech_synthesizer.update(dt)
except Exception as e:
print(f"✗ 插件更新失败: {e}")
if self.error_callback:
self.error_callback(f"插件更新失败: {e}")
def _recognition_worker(self):
"""语音识别工作线程"""
try:
while self.running and self.enabled:
if self.is_listening and self.speech_recognizer:
# 执行语音识别
self.speech_recognizer.process_audio()
time.sleep(0.01) # 10ms延迟
except Exception as e:
print(f"✗ 语音识别工作线程错误: {e}")
if self.error_callback:
self.error_callback(f"语音识别工作线程错误: {e}")
def start_listening(self) -> bool:
"""
开始语音监听
Returns:
是否开始成功
"""
try:
if not self.enabled:
print("✗ 插件未启用")
return False
if self.speech_recognizer:
result = self.speech_recognizer.start_listening()
if result:
self.is_listening = True
print("✓ 开始语音监听")
return result
return False
except Exception as e:
print(f"✗ 开始语音监听失败: {e}")
if self.error_callback:
self.error_callback(f"开始语音监听失败: {e}")
return False
def stop_listening(self):
"""停止语音监听"""
try:
if self.speech_recognizer:
self.speech_recognizer.stop_listening()
self.is_listening = False
print("✓ 停止语音监听")
except Exception as e:
print(f"✗ 停止语音监听失败: {e}")
if self.error_callback:
self.error_callback(f"停止语音监听失败: {e}")
def synthesize_speech(self, text: str, callback: Callable = None) -> bool:
"""
合成语音
Args:
text: 要合成的文本
callback: 合成完成回调函数
Returns:
是否开始合成
"""
try:
if not self.enabled:
print("✗ 插件未启用")
return False
if self.speech_synthesizer:
result = self.speech_synthesizer.synthesize(text, callback)
if result:
self.stats['synthesized_count'] += 1
print(f"✓ 开始合成语音: {text[:50]}...")
return result
return False
except Exception as e:
print(f"✗ 语音合成失败: {e}")
self.stats['synthesis_errors'] += 1
if self.error_callback:
self.error_callback(f"语音合成失败: {e}")
return False
def set_speech_recognized_callback(self, callback: Callable):
"""
设置语音识别回调函数
Args:
callback: 回调函数
"""
self.speech_recognized_callback = callback
def set_speech_synthesis_callback(self, callback: Callable):
"""
设置语音合成回调函数
Args:
callback: 回调函数
"""
self.speech_synthesis_callback = callback
def set_error_callback(self, callback: Callable):
"""
设置错误回调函数
Args:
callback: 回调函数
"""
self.error_callback = callback
def set_language(self, language: str):
"""
设置语言
Args:
language: 语言代码 (如 'zh-CN', 'en-US')
"""
self.config['language'] = language
if self.speech_recognizer:
self.speech_recognizer.set_language(language)
if self.speech_synthesizer:
self.speech_synthesizer.set_language(language)
print(f"✓ 语言设置为: {language}")
def set_voice(self, voice: str):
"""
设置语音
Args:
voice: 语音名称
"""
self.config['voice'] = voice
if self.speech_synthesizer:
self.speech_synthesizer.set_voice(voice)
print(f"✓ 语音设置为: {voice}")
def set_volume(self, volume: float):
"""
设置音量
Args:
volume: 音量 (0.0-1.0)
"""
self.config['volume'] = max(0.0, min(1.0, volume))
if self.speech_synthesizer:
self.speech_synthesizer.set_volume(volume)
print(f"✓ 音量设置为: {volume}")
def set_speed(self, speed: float):
"""
设置语速
Args:
speed: 语速 (0.5-2.0)
"""
self.config['speed'] = max(0.5, min(2.0, speed))
if self.speech_synthesizer:
self.speech_synthesizer.set_speed(speed)
print(f"✓ 语速设置为: {speed}")
def set_pitch(self, pitch: float):
"""
设置音调
Args:
pitch: 音调 (0.5-2.0)
"""
self.config['pitch'] = max(0.5, min(2.0, pitch))
if self.speech_synthesizer:
self.speech_synthesizer.set_pitch(pitch)
print(f"✓ 音调设置为: {pitch}")
def get_stats(self) -> Dict[str, Any]:
"""
获取统计信息
Returns:
统计信息字典
"""
# 合并各组件的统计信息
stats = self.stats.copy()
if self.speech_manager:
stats.update(self.speech_manager.get_stats())
if self.speech_recognizer:
stats.update(self.speech_recognizer.get_stats())
if self.speech_synthesizer:
stats.update(self.speech_synthesizer.get_stats())
return stats
def is_speaking(self) -> bool:
"""
检查是否正在播放语音
Returns:
是否正在播放语音
"""
if self.speech_synthesizer:
return self.speech_synthesizer.is_speaking()
return False
def stop_speaking(self):
"""停止播放语音"""
if self.speech_synthesizer:
self.speech_synthesizer.stop_speaking()
print("✓ 停止播放语音")
def get_available_voices(self) -> List[str]:
"""
获取可用语音列表
Returns:
语音名称列表
"""
if self.speech_synthesizer:
return self.speech_synthesizer.get_available_voices()
return []
def get_available_languages(self) -> List[str]:
"""
获取可用语言列表
Returns:
语言代码列表
"""
if self.speech_recognizer and self.speech_synthesizer:
recognizer_langs = self.speech_recognizer.get_available_languages()
synthesizer_langs = self.speech_synthesizer.get_available_languages()
# 返回两个列表的交集
return list(set(recognizer_langs) & set(synthesizer_langs))
elif self.speech_recognizer:
return self.speech_recognizer.get_available_languages()
elif self.speech_synthesizer:
return self.speech_synthesizer.get_available_languages()
return []
def save_audio_file(self, filename: str, audio_data: bytes) -> bool:
"""
保存音频文件
Args:
filename: 文件名
audio_data: 音频数据
Returns:
是否保存成功
"""
if self.audio_utils:
return self.audio_utils.save_audio_file(filename, audio_data)
return False
def load_audio_file(self, filename: str) -> bytes:
"""
加载音频文件
Args:
filename: 文件名
Returns:
音频数据
"""
if self.audio_utils:
return self.audio_utils.load_audio_file(filename)
return b''
# 插件注册信息
def get_plugin_info():
"""获取插件信息"""
return {
'name': 'speech_recognition_synthesis',
'version': '1.0.0',
'author': 'EG Team',
'description': '语音识别和合成插件,提供语音识别和文本转语音功能',
'dependencies': []
}
# 创建插件实例的工厂函数
def create_plugin(world):
"""创建插件实例"""
return SpeechRecognitionSynthesisPlugin(world)