EG/plugins/user/voice_control/nlp/nlp_processor.py
2025-12-12 16:16:15 +08:00

1010 lines
34 KiB
Python

"""
自然语言处理模块
负责处理和理解自然语言命令
"""
import time
import re
from typing import Dict, Any, List, Optional, Tuple
import threading
class NLPProcessor:
"""
自然语言处理器
负责处理和理解自然语言命令
"""
def __init__(self, plugin):
"""
初始化自然语言处理器
Args:
plugin: 语音控制插件实例
"""
self.plugin = plugin
self.enabled = False
self.initialized = False
# 语言配置
self.language_config = {
'primary_language': 'zh-CN',
'supported_languages': ['zh-CN', 'en-US', 'ja-JP'],
'enable_multi_language': True,
'enable_dialect_detection': True
}
# 意图识别
self.intents = {}
# 实体识别
self.entities = {}
# 语法分析器
self.grammar_rules = {}
# 词汇表
self.vocabularies = {}
# 上下文管理
self.context_manager = {
'current_context': 'default',
'context_history': [],
'context_variables': {},
'max_context_history': 10
}
# NLP配置
self.nlp_config = {
'enable_intent_recognition': True,
'enable_entity_extraction': True,
'enable_sentiment_analysis': True,
'enable_context_awareness': True,
'confidence_threshold': 0.6,
'enable_fallback_processing': True,
'fallback_threshold': 0.3
}
# 统计信息
self.nlp_stats = {
'texts_processed': 0,
'intents_recognized': 0,
'entities_extracted': 0,
'processing_errors': 0,
'fallback_used': 0,
'average_processing_time': 0.0
}
# 缓存管理
self.processing_cache = {}
self.cache_max_size = 1000
# 回调函数
self.nlp_callbacks = {
'text_processed': [],
'intent_recognized': [],
'entity_extracted': [],
'context_updated': [],
'processing_error': []
}
# 时间戳记录
self.last_processing_time = 0.0
self.last_cache_cleanup = 0.0
print("✓ 自然语言处理器已创建")
def initialize(self) -> bool:
"""
初始化自然语言处理器
Returns:
是否初始化成功
"""
try:
# 初始化意图识别
self._initialize_intents()
# 初始化实体识别
self._initialize_entities()
# 初始化语法规则
self._initialize_grammar_rules()
# 初始化词汇表
self._initialize_vocabularies()
self.initialized = True
print("✓ 自然语言处理器初始化完成")
return True
except Exception as e:
print(f"✗ 自然语言处理器初始化失败: {e}")
import traceback
traceback.print_exc()
return False
def enable(self) -> bool:
"""
启用自然语言处理器
Returns:
是否启用成功
"""
try:
if not self.initialized:
print("✗ 自然语言处理器未初始化")
return False
self.enabled = True
print("✓ 自然语言处理器已启用")
return True
except Exception as e:
print(f"✗ 自然语言处理器启用失败: {e}")
import traceback
traceback.print_exc()
return False
def disable(self):
"""禁用自然语言处理器"""
try:
self.enabled = False
print("✓ 自然语言处理器已禁用")
except Exception as e:
print(f"✗ 自然语言处理器禁用失败: {e}")
import traceback
traceback.print_exc()
def finalize(self):
"""清理自然语言处理器资源"""
try:
self.disable()
self.intents.clear()
self.entities.clear()
self.grammar_rules.clear()
self.vocabularies.clear()
self.nlp_callbacks.clear()
self.processing_cache.clear()
self.initialized = False
print("✓ 自然语言处理器资源已清理")
except Exception as e:
print(f"✗ 自然语言处理器资源清理失败: {e}")
import traceback
traceback.print_exc()
def update(self, dt: float):
"""
更新自然语言处理器状态
Args:
dt: 时间增量
"""
try:
if not self.enabled:
return
current_time = time.time()
self.last_processing_time = current_time
# 定期清理缓存
if current_time - self.last_cache_cleanup > 60.0: # 每分钟清理一次
self._cleanup_cache()
self.last_cache_cleanup = current_time
except Exception as e:
print(f"✗ 自然语言处理器更新失败: {e}")
import traceback
traceback.print_exc()
def _initialize_intents(self):
"""初始化意图识别"""
try:
self.intents = {
'navigation': {
'name': '导航',
'description': '移动控制相关意图',
'patterns': [
r'(向前|往前|前进).*',
r'(向后|往后|后退).*',
r'(向左|左转).*',
r'(向右|右转).*',
r'(停止|停).*'
],
'actions': ['move_forward', 'move_backward', 'turn_left', 'turn_right', 'stop']
},
'system_control': {
'name': '系统控制',
'description': '系统控制相关意图',
'patterns': [
r'(打开|开启).*灯.*',
r'(关闭|关掉).*灯.*',
r'(增加|调高|提高).*音量.*',
r'(减少|调低|降低).*音量.*'
],
'actions': ['toggle_lights', 'adjust_volume']
},
'entertainment': {
'name': '娱乐控制',
'description': '娱乐控制相关意图',
'patterns': [
r'(播放|开始播放).*音乐.*',
r'(暂停|停止播放).*音乐.*',
r'(下一首|下一首歌).*',
r'(上一首|上一首歌).*'
],
'actions': ['play_music', 'pause_music', 'next_track', 'previous_track']
},
'interaction': {
'name': '交互',
'description': '日常交互相关意图',
'patterns': [
r'(你好|您好|hello).*',
r'(谢谢|感谢).*',
r'(再见|拜拜|bye).*',
r'(帮助|help).*',
r'(关于|信息).*'
],
'actions': ['greet', 'thank', 'goodbye', 'help', 'about']
},
'query': {
'name': '查询',
'description': '信息查询相关意图',
'patterns': [
r'(时间|几点).*',
r'(日期|今天).*',
r'(天气|气温).*',
r'(状态|情况).*'
],
'actions': ['query_time', 'query_date', 'query_weather', 'query_status']
}
}
print("✓ 意图识别初始化完成")
except Exception as e:
print(f"✗ 意图识别初始化失败: {e}")
def _initialize_entities(self):
"""初始化实体识别"""
try:
self.entities = {
'direction': {
'name': '方向',
'values': ['', '', '', '', 'forward', 'backward', 'left', 'right']
},
'object': {
'name': '对象',
'values': ['', '音乐', '音量', 'light', 'music', 'volume']
},
'action': {
'name': '动作',
'values': ['打开', '关闭', '增加', '减少', '播放', '暂停', 'open', 'close', 'increase', 'decrease', 'play', 'pause']
},
'time_unit': {
'name': '时间单位',
'values': ['', '分钟', '小时', 'second', 'minute', 'hour']
}
}
print("✓ 实体识别初始化完成")
except Exception as e:
print(f"✗ 实体识别初始化失败: {e}")
def _initialize_grammar_rules(self):
"""初始化语法规则"""
try:
self.grammar_rules = {
'command_structure': {
'name': '命令结构',
'rules': [
'动词+对象',
'对象+动词',
'动词+形容词+对象',
'请+动词+对象'
]
},
'query_structure': {
'name': '查询结构',
'rules': [
'疑问词+动词',
'疑问词+名词',
'动词+疑问词'
]
}
}
print("✓ 语法规则初始化完成")
except Exception as e:
print(f"✗ 语法规则初始化失败: {e}")
def _initialize_vocabularies(self):
"""初始化词汇表"""
try:
self.vocabularies = {
'zh-CN': {
'greetings': ['你好', '您好', '早上好', '下午好', '晚上好'],
'affirmatives': ['', '', '', '可以', ''],
'negatives': ['', '', '不要', '不行', '不可以'],
'confirmations': ['确定', '确认', '好的', '明白'],
'questions': ['什么', '怎么', '为什么', '何时', '哪里']
},
'en-US': {
'greetings': ['hello', 'hi', 'good morning', 'good afternoon', 'good evening'],
'affirmatives': ['yes', 'yeah', 'yep', 'okay', 'ok', 'sure'],
'negatives': ['no', 'nope', 'not'],
'confirmations': ['confirm', 'okay', 'alright', 'got it'],
'questions': ['what', 'how', 'why', 'when', 'where']
}
}
print("✓ 词汇表初始化完成")
except Exception as e:
print(f"✗ 词汇表初始化失败: {e}")
def _cleanup_cache(self):
"""清理缓存"""
try:
current_time = time.time()
expired_keys = [
key for key, value in self.processing_cache.items()
if current_time - value['timestamp'] > 300.0 # 5分钟过期
]
for key in expired_keys:
del self.processing_cache[key]
# 保持缓存大小
if len(self.processing_cache) > self.cache_max_size:
# 移除最旧的一半
sorted_items = sorted(self.processing_cache.items(),
key=lambda x: x[1]['timestamp'])
keys_to_remove = [item[0] for item in sorted_items[:self.cache_max_size//2]]
for key in keys_to_remove:
del self.processing_cache[key]
except Exception as e:
print(f"✗ 缓存清理失败: {e}")
def process_text(self, text: str, language: str = None) -> Dict[str, Any]:
"""
处理文本
Args:
text: 要处理的文本
language: 语言代码
Returns:
处理结果字典
"""
try:
if not self.enabled:
print("✗ 自然语言处理器未启用")
return self._create_error_result("NLP processor not enabled")
start_time = time.time()
# 使用默认语言
if language is None:
language = self.language_config['primary_language']
# 检查缓存
cache_key = f"{text}_{language}"
if cache_key in self.processing_cache:
cached_result = self.processing_cache[cache_key]
# 更新时间戳
cached_result['timestamp'] = time.time()
self.nlp_stats['texts_processed'] += 1
return cached_result['result']
# 文本预处理
processed_text = self._preprocess_text(text, language)
# 语言检测
detected_language = self._detect_language(processed_text)
if detected_language != language and self.language_config['enable_dialect_detection']:
language = detected_language
# 意图识别
intent_result = self._recognize_intent(processed_text, language)
# 实体提取
entity_result = self._extract_entities(processed_text, language)
# 上下文分析
context_result = self._analyze_context(processed_text, language)
# 情感分析
sentiment_result = self._analyze_sentiment(processed_text, language)
# 构建结果
result = {
'text': text,
'processed_text': processed_text,
'language': language,
'intent': intent_result,
'entities': entity_result,
'context': context_result,
'sentiment': sentiment_result,
'timestamp': time.time(),
'processing_time': time.time() - start_time
}
# 缓存结果
self.processing_cache[cache_key] = {
'result': result,
'timestamp': time.time()
}
# 更新统计信息
self.nlp_stats['texts_processed'] += 1
if intent_result['confidence'] > self.nlp_config['confidence_threshold']:
self.nlp_stats['intents_recognized'] += 1
self.nlp_stats['entities_extracted'] += len(entity_result.get('entities', []))
# 计算平均处理时间
self.nlp_stats['average_processing_time'] = (
(self.nlp_stats['average_processing_time'] * (self.nlp_stats['texts_processed'] - 1) +
result['processing_time']) / self.nlp_stats['texts_processed']
)
# 触发文本处理回调
self._trigger_nlp_callback('text_processed', result)
return result
except Exception as e:
print(f"✗ 文本处理失败: {e}")
import traceback
traceback.print_exc()
self.nlp_stats['processing_errors'] += 1
# 触发处理错误回调
error_data = {
'text': text,
'error': str(e),
'timestamp': time.time()
}
self._trigger_nlp_callback('processing_error', error_data)
return self._create_error_result(str(e))
def _preprocess_text(self, text: str, language: str) -> str:
"""
文本预处理
Args:
text: 原始文本
language: 语言代码
Returns:
预处理后的文本
"""
try:
# 转换为小写(仅适用于英文)
if language == 'en-US':
text = text.lower()
# 移除多余空格
text = re.sub(r'\s+', ' ', text).strip()
# 移除特殊字符(保留中文、英文、数字和基本标点)
if language == 'zh-CN':
text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s\.,!?;:]', '', text)
else:
text = re.sub(r'[^a-zA-Z0-9\s\.,!?;:]', '', text)
return text
except Exception as e:
print(f"✗ 文本预处理失败: {e}")
return text
def _detect_language(self, text: str) -> str:
"""
检测语言
Args:
text: 文本
Returns:
语言代码
"""
try:
# 简单的语言检测(基于字符集)
chinese_chars = len(re.findall(r'[\u4e00-\u9fa5]', text))
english_chars = len(re.findall(r'[a-zA-Z]', text))
if chinese_chars > english_chars:
return 'zh-CN'
else:
return 'en-US'
except Exception as e:
print(f"✗ 语言检测失败: {e}")
return self.language_config['primary_language']
def _recognize_intent(self, text: str, language: str) -> Dict[str, Any]:
"""
意图识别
Args:
text: 文本
language: 语言代码
Returns:
意图识别结果
"""
try:
best_intent = None
best_confidence = 0.0
# 遍历所有意图模式
for intent_key, intent_data in self.intents.items():
patterns = intent_data.get('patterns', [])
for pattern in patterns:
if re.search(pattern, text, re.IGNORECASE):
# 计算匹配度
confidence = self._calculate_match_confidence(text, pattern)
if confidence > best_confidence:
best_confidence = confidence
best_intent = {
'name': intent_key,
'display_name': intent_data['name'],
'description': intent_data['description'],
'confidence': confidence,
'actions': intent_data.get('actions', [])
}
if best_intent and best_confidence >= self.nlp_config['confidence_threshold']:
# 触发意图识别回调
self._trigger_nlp_callback('intent_recognized', best_intent)
return best_intent
elif (best_intent and
best_confidence >= self.nlp_config['fallback_threshold'] and
self.nlp_config['enable_fallback_processing']):
# 使用回退处理
self.nlp_stats['fallback_used'] += 1
best_intent['confidence'] = best_confidence
return best_intent
else:
return {
'name': 'unknown',
'display_name': '未知意图',
'description': '无法识别的意图',
'confidence': 0.0,
'actions': []
}
except Exception as e:
print(f"✗ 意图识别失败: {e}")
return {
'name': 'error',
'display_name': '处理错误',
'description': f'意图识别错误: {e}',
'confidence': 0.0,
'actions': []
}
def _calculate_match_confidence(self, text: str, pattern: str) -> float:
"""
计算匹配置信度
Args:
text: 文本
pattern: 模式
Returns:
置信度(0.0-1.0)
"""
try:
# 简单的匹配度计算
match = re.search(pattern, text, re.IGNORECASE)
if match:
# 基于匹配长度和文本长度的比例
match_length = len(match.group(0))
text_length = len(text)
if text_length > 0:
return min(1.0, match_length / text_length)
return 0.0
except Exception as e:
print(f"✗ 匹配置信度计算失败: {e}")
return 0.0
def _extract_entities(self, text: str, language: str) -> Dict[str, Any]:
"""
实体提取
Args:
text: 文本
language: 语言代码
Returns:
实体提取结果
"""
try:
entities = []
# 遍历所有实体类型
for entity_key, entity_data in self.entities.items():
values = entity_data.get('values', [])
for value in values:
if value in text:
entities.append({
'type': entity_key,
'display_name': entity_data['name'],
'value': value,
'position': text.find(value)
})
if entities:
# 触发实体提取回调
for entity in entities:
self._trigger_nlp_callback('entity_extracted', entity)
return {
'entities': entities,
'count': len(entities)
}
except Exception as e:
print(f"✗ 实体提取失败: {e}")
return {
'entities': [],
'count': 0
}
def _analyze_context(self, text: str, language: str) -> Dict[str, Any]:
"""
上下文分析
Args:
text: 文本
language: 语言代码
Returns:
上下文分析结果
"""
try:
# 检查是否与当前上下文相关
context_relevant = False
# 这里可以实现更复杂的上下文分析逻辑
# 例如:检查是否提及之前的对话内容等
return {
'current_context': self.context_manager['current_context'],
'context_relevant': context_relevant,
'context_variables': self.context_manager['context_variables'].copy()
}
except Exception as e:
print(f"✗ 上下文分析失败: {e}")
return {
'current_context': 'default',
'context_relevant': False,
'context_variables': {}
}
def _analyze_sentiment(self, text: str, language: str) -> Dict[str, Any]:
"""
情感分析
Args:
text: 文本
language: 语言代码
Returns:
情感分析结果
"""
try:
# 简单的情感分析
positive_words = ['', '', '喜欢', '开心', '高兴', 'good', 'great', 'happy', 'like']
negative_words = ['', '', '讨厌', '生气', '难过', 'bad', 'terrible', 'angry', 'sad']
positive_count = sum(1 for word in positive_words if word in text)
negative_count = sum(1 for word in negative_words if word in text)
if positive_count > negative_count:
sentiment = 'positive'
confidence = positive_count / (positive_count + negative_count + 1)
elif negative_count > positive_count:
sentiment = 'negative'
confidence = negative_count / (positive_count + negative_count + 1)
else:
sentiment = 'neutral'
confidence = 0.5
return {
'sentiment': sentiment,
'confidence': confidence,
'positive_words': positive_count,
'negative_words': negative_count
}
except Exception as e:
print(f"✗ 情感分析失败: {e}")
return {
'sentiment': 'neutral',
'confidence': 0.0,
'positive_words': 0,
'negative_words': 0
}
def _create_error_result(self, error_message: str) -> Dict[str, Any]:
"""
创建错误结果
Args:
error_message: 错误消息
Returns:
错误结果字典
"""
return {
'text': '',
'processed_text': '',
'language': self.language_config['primary_language'],
'intent': {
'name': 'error',
'display_name': '处理错误',
'description': error_message,
'confidence': 0.0,
'actions': []
},
'entities': {
'entities': [],
'count': 0
},
'context': {
'current_context': 'default',
'context_relevant': False,
'context_variables': {}
},
'sentiment': {
'sentiment': 'neutral',
'confidence': 0.0,
'positive_words': 0,
'negative_words': 0
},
'timestamp': time.time(),
'processing_time': 0.0,
'error': error_message
}
def update_context(self, context_name: str, variables: Dict[str, Any] = None) -> bool:
"""
更新上下文
Args:
context_name: 上下文名称
variables: 上下文变量
Returns:
是否更新成功
"""
try:
# 保存当前上下文到历史
self.context_manager['context_history'].append({
'context': self.context_manager['current_context'],
'variables': self.context_manager['context_variables'].copy(),
'timestamp': time.time()
})
# 保持历史记录大小
if len(self.context_manager['context_history']) > self.context_manager['max_context_history']:
self.context_manager['context_history'].pop(0)
# 更新当前上下文
self.context_manager['current_context'] = context_name
if variables:
self.context_manager['context_variables'] = variables
else:
self.context_manager['context_variables'] = {}
# 触发上下文更新回调
self._trigger_nlp_callback('context_updated', {
'context': context_name,
'variables': variables
})
print(f"✓ 上下文已更新为: {context_name}")
return True
except Exception as e:
print(f"✗ 上下文更新失败: {e}")
return False
def get_current_context(self) -> Dict[str, Any]:
"""
获取当前上下文
Returns:
当前上下文字典
"""
return {
'context': self.context_manager['current_context'],
'variables': self.context_manager['context_variables'].copy(),
'history': self.context_manager['context_history'].copy()
}
def clear_context(self) -> bool:
"""
清空上下文
Returns:
是否清空成功
"""
try:
self.context_manager['current_context'] = 'default'
self.context_manager['context_variables'] = {}
self.context_manager['context_history'] = []
print("✓ 上下文已清空")
return True
except Exception as e:
print(f"✗ 上下文清空失败: {e}")
return False
def get_intents(self) -> Dict[str, Any]:
"""
获取意图定义
Returns:
意图定义字典
"""
return self.intents.copy()
def get_entities(self) -> Dict[str, Any]:
"""
获取实体定义
Returns:
实体定义字典
"""
return self.entities.copy()
def get_vocabularies(self) -> Dict[str, Any]:
"""
获取词汇表
Returns:
词汇表字典
"""
return self.vocabularies.copy()
def set_language_config(self, config: Dict[str, Any]) -> bool:
"""
设置语言配置
Args:
config: 配置字典
Returns:
是否设置成功
"""
try:
self.language_config.update(config)
print(f"✓ 语言配置已更新: {self.language_config}")
return True
except Exception as e:
print(f"✗ 语言配置设置失败: {e}")
return False
def get_language_config(self) -> Dict[str, Any]:
"""
获取语言配置
Returns:
语言配置字典
"""
return self.language_config.copy()
def set_nlp_config(self, config: Dict[str, Any]) -> bool:
"""
设置NLP配置
Args:
config: 配置字典
Returns:
是否设置成功
"""
try:
self.nlp_config.update(config)
print(f"✓ NLP配置已更新: {self.nlp_config}")
return True
except Exception as e:
print(f"✗ NLP配置设置失败: {e}")
return False
def get_nlp_config(self) -> Dict[str, Any]:
"""
获取NLP配置
Returns:
NLP配置字典
"""
return self.nlp_config.copy()
def _trigger_nlp_callback(self, callback_type: str, data: Dict[str, Any]):
"""
触发NLP回调
Args:
callback_type: 回调类型
data: 回调数据
"""
try:
if callback_type in self.nlp_callbacks:
for callback in self.nlp_callbacks[callback_type]:
try:
callback(data)
except Exception as e:
print(f"✗ NLP回调执行失败: {e}")
except Exception as e:
print(f"✗ NLP回调触发失败: {e}")
def register_nlp_callback(self, callback_type: str, callback: callable):
"""
注册NLP回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.nlp_callbacks:
self.nlp_callbacks[callback_type].append(callback)
print(f"✓ NLP回调已注册: {callback_type}")
else:
print(f"✗ 无效的回调类型: {callback_type}")
except Exception as e:
print(f"✗ NLP回调注册失败: {e}")
def unregister_nlp_callback(self, callback_type: str, callback: callable):
"""
注销NLP回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.nlp_callbacks:
if callback in self.nlp_callbacks[callback_type]:
self.nlp_callbacks[callback_type].remove(callback)
print(f"✓ NLP回调已注销: {callback_type}")
except Exception as e:
print(f"✗ NLP回调注销失败: {e}")
def get_stats(self) -> Dict[str, Any]:
"""
获取统计信息
Returns:
统计信息字典
"""
return self.nlp_stats.copy()
def reset_stats(self):
"""重置统计信息"""
try:
self.nlp_stats = {
'texts_processed': 0,
'intents_recognized': 0,
'entities_extracted': 0,
'processing_errors': 0,
'fallback_used': 0,
'average_processing_time': 0.0
}
print("✓ NLP处理器统计信息已重置")
except Exception as e:
print(f"✗ NLP处理器统计信息重置失败: {e}")