825 lines
31 KiB
Python
825 lines
31 KiB
Python
"""
|
||
通信监控模块
|
||
实时监控通信性能和记录日志
|
||
"""
|
||
|
||
import time
|
||
import threading
|
||
import json
|
||
import os
|
||
from typing import Dict, Any, List, Optional
|
||
from collections import deque
|
||
|
||
class CommunicationMonitor:
|
||
"""
|
||
通信监控器
|
||
实时监控通信性能和记录日志
|
||
"""
|
||
|
||
def __init__(self, plugin):
|
||
"""
|
||
初始化通信监控器
|
||
|
||
Args:
|
||
plugin: 实时通信插件实例
|
||
"""
|
||
self.plugin = plugin
|
||
self.enabled = False
|
||
self.initialized = False
|
||
|
||
# 监控配置
|
||
self.monitor_config = {
|
||
"enable_monitoring": True,
|
||
"monitoring_interval": 5.0,
|
||
"log_level": "INFO",
|
||
"enable_file_logging": True,
|
||
"log_file_path": "/home/hello/EG/plugins/user/realtime_communication/logs/communication_monitor.log",
|
||
"max_log_file_size": 10485760, # 10MB
|
||
"enable_performance_monitoring": True,
|
||
"enable_alerts": True,
|
||
"alert_thresholds": {
|
||
"cpu_usage": 80.0,
|
||
"memory_usage": 85.0,
|
||
"network_latency": 100.0,
|
||
"error_rate": 5.0,
|
||
"message_queue_size": 1000
|
||
},
|
||
"enable_metrics_collection": True,
|
||
"metrics_retention_period": 3600, # 1小时
|
||
"enable_realtime_dashboard": True
|
||
}
|
||
|
||
# 性能指标存储
|
||
self.performance_metrics = {
|
||
"cpu_usage": deque(maxlen=1000),
|
||
"memory_usage": deque(maxlen=1000),
|
||
"network_in": deque(maxlen=1000),
|
||
"network_out": deque(maxlen=1000),
|
||
"active_connections": deque(maxlen=1000),
|
||
"messages_per_second": deque(maxlen=1000),
|
||
"message_queue_size": deque(maxlen=1000),
|
||
"error_rate": deque(maxlen=1000)
|
||
}
|
||
|
||
# 监控状态
|
||
self.monitor_state = {
|
||
"last_monitoring_update": 0.0,
|
||
"last_performance_check": 0.0,
|
||
"total_logs": 0,
|
||
"total_alerts": 0,
|
||
"active_alerts": 0
|
||
}
|
||
|
||
# 警报系统
|
||
self.active_alerts = {}
|
||
self.alert_history = deque(maxlen=1000)
|
||
|
||
# 日志文件
|
||
self.log_file = None
|
||
self.log_file_size = 0
|
||
|
||
# 监控统计
|
||
self.monitor_stats = {
|
||
"metrics_collected": 0,
|
||
"logs_written": 0,
|
||
"alerts_generated": 0,
|
||
"alerts_resolved": 0,
|
||
"performance_checks": 0,
|
||
"monitor_errors": 0
|
||
}
|
||
|
||
# 线程锁
|
||
self.monitor_lock = threading.RLock()
|
||
|
||
# 回调函数
|
||
self.monitor_callbacks = {
|
||
"metric_collected": [],
|
||
"alert_triggered": [],
|
||
"alert_resolved": [],
|
||
"log_entry": [],
|
||
"performance_degraded": []
|
||
}
|
||
|
||
# 监控线程
|
||
self.monitor_thread = None
|
||
self.monitor_thread_running = False
|
||
|
||
# 时间戳记录
|
||
self.last_log_write = 0.0
|
||
self.last_metric_collection = 0.0
|
||
|
||
print("✓ 通信监控器已创建")
|
||
|
||
def initialize(self) -> bool:
|
||
"""
|
||
初始化通信监控器
|
||
|
||
Returns:
|
||
是否初始化成功
|
||
"""
|
||
try:
|
||
print("正在初始化通信监控器...")
|
||
|
||
# 创建日志目录
|
||
log_dir = os.path.dirname(self.monitor_config["log_file_path"])
|
||
if not os.path.exists(log_dir):
|
||
os.makedirs(log_dir)
|
||
|
||
# 初始化日志文件
|
||
if self.monitor_config["enable_file_logging"]:
|
||
self._initialize_log_file()
|
||
|
||
# 启动监控线程
|
||
self._start_monitor_thread()
|
||
|
||
self.initialized = True
|
||
print("✓ 通信监控器初始化完成")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ 通信监控器初始化失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def _initialize_log_file(self):
|
||
"""初始化日志文件"""
|
||
try:
|
||
# 检查文件是否存在
|
||
if os.path.exists(self.monitor_config["log_file_path"]):
|
||
self.log_file_size = os.path.getsize(self.monitor_config["log_file_path"])
|
||
|
||
# 打开日志文件
|
||
self.log_file = open(self.monitor_config["log_file_path"], "a", encoding="utf-8")
|
||
print(f"✓ 日志文件已初始化: {self.monitor_config['log_file_path']}")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 日志文件初始化失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _start_monitor_thread(self):
|
||
"""启动监控线程"""
|
||
try:
|
||
self.monitor_thread_running = True
|
||
self.monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True)
|
||
self.monitor_thread.start()
|
||
print("✓ 监控线程已启动")
|
||
except Exception as e:
|
||
print(f"✗ 监控线程启动失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _monitor_loop(self):
|
||
"""监控线程循环"""
|
||
try:
|
||
while self.monitor_thread_running:
|
||
try:
|
||
if self.enabled and self.monitor_config["enable_monitoring"]:
|
||
current_time = time.time()
|
||
|
||
# 收集性能指标
|
||
if current_time - self.last_metric_collection >= self.monitor_config["monitoring_interval"]:
|
||
self._collect_performance_metrics()
|
||
self.last_metric_collection = current_time
|
||
|
||
# 检查警报
|
||
self._check_alerts()
|
||
|
||
# 检查日志文件大小
|
||
self._check_log_file_size()
|
||
|
||
# 短暂休眠
|
||
time.sleep(1.0)
|
||
|
||
except Exception as e:
|
||
print(f"✗ 监控循环错误: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
time.sleep(5.0) # 出错时延长休眠
|
||
|
||
except Exception as e:
|
||
print(f"✗ 监控线程失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def enable(self) -> bool:
|
||
"""
|
||
启用通信监控器
|
||
|
||
Returns:
|
||
是否启用成功
|
||
"""
|
||
try:
|
||
if not self.initialized:
|
||
print("✗ 通信监控器未初始化")
|
||
return False
|
||
|
||
self.enabled = True
|
||
print("✓ 通信监控器已启用")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"✗ 通信监控器启用失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def disable(self):
|
||
"""禁用通信监控器"""
|
||
try:
|
||
self.enabled = False
|
||
|
||
# 停止监控线程
|
||
if self.monitor_thread_running:
|
||
self.monitor_thread_running = False
|
||
if self.monitor_thread and self.monitor_thread.is_alive():
|
||
self.monitor_thread.join(timeout=5.0)
|
||
|
||
# 关闭日志文件
|
||
if self.log_file:
|
||
self.log_file.close()
|
||
self.log_file = None
|
||
|
||
print("✓ 通信监控器已禁用")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 通信监控器禁用失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def finalize(self):
|
||
"""清理通信监控器资源"""
|
||
try:
|
||
# 禁用通信监控器
|
||
if self.enabled:
|
||
self.disable()
|
||
|
||
# 清理回调
|
||
self.monitor_callbacks.clear()
|
||
|
||
self.initialized = False
|
||
print("✓ 通信监控器资源已清理")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 通信监控器资源清理失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def update(self, dt: float):
|
||
"""
|
||
更新通信监控器状态
|
||
|
||
Args:
|
||
dt: 时间增量(秒)
|
||
"""
|
||
try:
|
||
if not self.enabled:
|
||
return
|
||
|
||
current_time = time.time()
|
||
self.monitor_state["last_monitoring_update"] = current_time
|
||
|
||
except Exception as e:
|
||
print(f"✗ 通信监控器更新失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def _collect_performance_metrics(self):
|
||
"""收集性能指标"""
|
||
try:
|
||
current_time = time.time()
|
||
metrics = {}
|
||
|
||
# 收集系统资源使用情况
|
||
try:
|
||
import psutil
|
||
|
||
# CPU使用率
|
||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||
metrics["cpu_usage"] = cpu_percent
|
||
self.performance_metrics["cpu_usage"].append((current_time, cpu_percent))
|
||
|
||
# 内存使用率
|
||
memory_info = psutil.virtual_memory()
|
||
memory_percent = memory_info.percent
|
||
metrics["memory_usage"] = memory_percent
|
||
self.performance_metrics["memory_usage"].append((current_time, memory_percent))
|
||
|
||
# 网络使用情况
|
||
net_io = psutil.net_io_counters()
|
||
metrics["network_in"] = net_io.bytes_recv
|
||
metrics["network_out"] = net_io.bytes_sent
|
||
self.performance_metrics["network_in"].append((current_time, net_io.bytes_recv))
|
||
self.performance_metrics["network_out"].append((current_time, net_io.bytes_sent))
|
||
|
||
except ImportError:
|
||
# 如果没有psutil,使用简化的指标
|
||
metrics["cpu_usage"] = 0.0
|
||
metrics["memory_usage"] = 0.0
|
||
metrics["network_in"] = 0
|
||
metrics["network_out"] = 0
|
||
|
||
# 收集服务器指标
|
||
if self.plugin.websocket_server:
|
||
server_stats = self.plugin.websocket_server.get_server_stats()
|
||
server_state = server_stats.get("state", {})
|
||
|
||
# 活跃连接数
|
||
active_connections = server_state.get("current_connections", 0)
|
||
metrics["active_connections"] = active_connections
|
||
self.performance_metrics["active_connections"].append((current_time, active_connections))
|
||
|
||
# 数据传输量
|
||
bytes_sent = server_state.get("bytes_sent", 0)
|
||
bytes_received = server_state.get("bytes_received", 0)
|
||
metrics["bytes_sent"] = bytes_sent
|
||
metrics["bytes_received"] = bytes_received
|
||
|
||
# 收集客户端指标
|
||
if self.plugin.client_manager:
|
||
client_stats = self.plugin.client_manager.get_client_stats()
|
||
client_state = client_stats.get("state", {})
|
||
client_count = client_state.get("current_clients", 0)
|
||
metrics["client_count"] = client_count
|
||
|
||
# 收集消息指标
|
||
if self.plugin.message_router:
|
||
message_stats = self.plugin.message_router.get_message_stats()
|
||
message_state = message_stats.get("state", {})
|
||
message_stats_data = message_stats.get("stats", {})
|
||
|
||
# 消息队列大小
|
||
pending_messages = message_state.get("pending_messages", 0)
|
||
metrics["message_queue_size"] = pending_messages
|
||
self.performance_metrics["message_queue_size"].append((current_time, pending_messages))
|
||
|
||
# 消息处理速率
|
||
messages_routed = message_stats_data.get("messages_routed", 0)
|
||
if hasattr(self, '_last_messages_routed'):
|
||
mps = (messages_routed - self._last_messages_routed) / self.monitor_config["monitoring_interval"]
|
||
metrics["messages_per_second"] = max(0, mps)
|
||
self.performance_metrics["messages_per_second"].append((current_time, metrics["messages_per_second"]))
|
||
else:
|
||
metrics["messages_per_second"] = 0
|
||
self.performance_metrics["messages_per_second"].append((current_time, 0))
|
||
|
||
self._last_messages_routed = messages_routed
|
||
|
||
# 收集房间指标
|
||
if self.plugin.room_manager:
|
||
room_stats = self.plugin.room_manager.get_room_stats()
|
||
room_state = room_stats.get("state", {})
|
||
active_rooms = room_state.get("active_rooms", 0)
|
||
metrics["active_rooms"] = active_rooms
|
||
|
||
# 更新统计
|
||
self.monitor_stats["metrics_collected"] += 1
|
||
self.last_metric_collection = current_time
|
||
|
||
# 触发指标收集回调
|
||
self._trigger_monitor_callback("metric_collected", {
|
||
"metrics": metrics,
|
||
"timestamp": current_time
|
||
})
|
||
|
||
# 记录日志
|
||
if self._should_log("DEBUG"):
|
||
self._write_log("DEBUG", f"性能指标收集: {metrics}")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 性能指标收集失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _check_alerts(self):
|
||
"""检查警报条件"""
|
||
try:
|
||
if not self.monitor_config["enable_alerts"]:
|
||
return
|
||
|
||
current_time = time.time()
|
||
thresholds = self.monitor_config["alert_thresholds"]
|
||
|
||
# 检查CPU使用率
|
||
if self.performance_metrics["cpu_usage"]:
|
||
latest_cpu = self.performance_metrics["cpu_usage"][-1][1]
|
||
if latest_cpu > thresholds["cpu_usage"]:
|
||
self._trigger_alert("high_cpu_usage", f"CPU使用率过高: {latest_cpu:.2f}%", current_time)
|
||
|
||
# 检查内存使用率
|
||
if self.performance_metrics["memory_usage"]:
|
||
latest_memory = self.performance_metrics["memory_usage"][-1][1]
|
||
if latest_memory > thresholds["memory_usage"]:
|
||
self._trigger_alert("high_memory_usage", f"内存使用率过高: {latest_memory:.2f}%", current_time)
|
||
|
||
# 检查消息队列大小
|
||
if self.performance_metrics["message_queue_size"]:
|
||
latest_queue_size = self.performance_metrics["message_queue_size"][-1][1]
|
||
if latest_queue_size > thresholds["message_queue_size"]:
|
||
self._trigger_alert("high_message_queue", f"消息队列过大: {latest_queue_size}", current_time)
|
||
|
||
# 检查错误率
|
||
if self.performance_metrics["error_rate"]:
|
||
latest_error_rate = self.performance_metrics["error_rate"][-1][1]
|
||
if latest_error_rate > thresholds["error_rate"]:
|
||
self._trigger_alert("high_error_rate", f"错误率过高: {latest_error_rate:.2f}%", current_time)
|
||
|
||
# 检查活跃连接数
|
||
if self.performance_metrics["active_connections"]:
|
||
latest_connections = self.performance_metrics["active_connections"][-1][1]
|
||
if self.plugin.websocket_server:
|
||
max_connections = self.plugin.websocket_server.websocket_config.get("max_connections", 1000)
|
||
if latest_connections > max_connections * 0.9: # 90%阈值
|
||
self._trigger_alert("high_connections", f"连接数接近上限: {latest_connections}/{max_connections}", current_time)
|
||
|
||
except Exception as e:
|
||
print(f"✗ 警报检查失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _trigger_alert(self, alert_type: str, message: str, timestamp: float = None):
|
||
"""
|
||
触发警报
|
||
|
||
Args:
|
||
alert_type: 警报类型
|
||
message: 警报消息
|
||
timestamp: 时间戳
|
||
"""
|
||
try:
|
||
if timestamp is None:
|
||
timestamp = time.time()
|
||
|
||
alert_id = f"{alert_type}_{int(timestamp)}"
|
||
|
||
# 检查是否已存在相同类型的活动警报
|
||
with self.monitor_lock:
|
||
if alert_type in self.active_alerts:
|
||
# 更新现有警报
|
||
self.active_alerts[alert_type]["count"] += 1
|
||
self.active_alerts[alert_type]["last_triggered"] = timestamp
|
||
else:
|
||
# 创建新警报
|
||
self.active_alerts[alert_type] = {
|
||
"id": alert_id,
|
||
"type": alert_type,
|
||
"message": message,
|
||
"first_triggered": timestamp,
|
||
"last_triggered": timestamp,
|
||
"count": 1,
|
||
"resolved": False
|
||
}
|
||
|
||
self.monitor_state["active_alerts"] += 1
|
||
self.monitor_stats["alerts_generated"] += 1
|
||
|
||
# 添加到警报历史
|
||
self.alert_history.append({
|
||
"id": alert_id,
|
||
"type": alert_type,
|
||
"message": message,
|
||
"timestamp": timestamp,
|
||
"resolved": False
|
||
})
|
||
|
||
# 触发警报回调
|
||
self._trigger_monitor_callback("alert_triggered", {
|
||
"alert_id": alert_id,
|
||
"alert_type": alert_type,
|
||
"message": message,
|
||
"timestamp": timestamp
|
||
})
|
||
|
||
# 记录日志
|
||
self._write_log("WARNING", f"警报触发 [{alert_type}]: {message}")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 警报触发失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def resolve_alert(self, alert_type: str):
|
||
"""
|
||
解决警报
|
||
|
||
Args:
|
||
alert_type: 警报类型
|
||
"""
|
||
try:
|
||
with self.monitor_lock:
|
||
if alert_type in self.active_alerts:
|
||
alert_data = self.active_alerts[alert_type]
|
||
alert_data["resolved"] = True
|
||
alert_data["resolved_time"] = time.time()
|
||
|
||
self.monitor_state["active_alerts"] -= 1
|
||
self.monitor_stats["alerts_resolved"] += 1
|
||
|
||
# 触发警报解决回调
|
||
self._trigger_monitor_callback("alert_resolved", {
|
||
"alert_type": alert_type,
|
||
"alert_data": alert_data
|
||
})
|
||
|
||
# 从活动警报中移除
|
||
del self.active_alerts[alert_type]
|
||
|
||
# 记录日志
|
||
self._write_log("INFO", f"警报已解决: {alert_type}")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 警报解决失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _check_log_file_size(self):
|
||
"""检查日志文件大小"""
|
||
try:
|
||
if not self.monitor_config["enable_file_logging"] or not self.log_file:
|
||
return
|
||
|
||
# 检查文件大小
|
||
if os.path.exists(self.monitor_config["log_file_path"]):
|
||
current_size = os.path.getsize(self.monitor_config["log_file_path"])
|
||
if current_size > self.monitor_config["max_log_file_size"]:
|
||
# 轮转日志文件
|
||
self._rotate_log_file()
|
||
|
||
except Exception as e:
|
||
print(f"✗ 日志文件大小检查失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _rotate_log_file(self):
|
||
"""轮转日志文件"""
|
||
try:
|
||
if self.log_file:
|
||
self.log_file.close()
|
||
|
||
# 重命名当前日志文件
|
||
timestamp = int(time.time())
|
||
rotated_filename = f"{self.monitor_config['log_file_path']}.{timestamp}"
|
||
os.rename(self.monitor_config["log_file_path"], rotated_filename)
|
||
|
||
# 重新初始化日志文件
|
||
self._initialize_log_file()
|
||
|
||
print(f"✓ 日志文件已轮转: {rotated_filename}")
|
||
|
||
except Exception as e:
|
||
print(f"✗ 日志文件轮转失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _write_log(self, level: str, message: str, extra_data: Dict[str, Any] = None):
|
||
"""
|
||
写入日志
|
||
|
||
Args:
|
||
level: 日志级别
|
||
message: 日志消息
|
||
extra_data: 额外数据
|
||
"""
|
||
try:
|
||
if not self._should_log(level):
|
||
return
|
||
|
||
timestamp = time.time()
|
||
log_entry = {
|
||
"timestamp": timestamp,
|
||
"level": level,
|
||
"message": message,
|
||
"extra_data": extra_data
|
||
}
|
||
|
||
# 控制台输出
|
||
print(f"[{level}] {message}")
|
||
|
||
# 文件日志
|
||
if self.monitor_config["enable_file_logging"] and self.log_file:
|
||
try:
|
||
log_line = json.dumps(log_entry, ensure_ascii=False)
|
||
self.log_file.write(log_line + "\n")
|
||
self.log_file.flush()
|
||
|
||
self.log_file_size += len(log_line) + 1
|
||
self.monitor_stats["logs_written"] += 1
|
||
self.monitor_state["total_logs"] += 1
|
||
|
||
except Exception as e:
|
||
print(f"✗ 文件日志写入失败: {e}")
|
||
|
||
# 触发日志条目回调
|
||
self._trigger_monitor_callback("log_entry", log_entry)
|
||
|
||
self.last_log_write = timestamp
|
||
|
||
except Exception as e:
|
||
print(f"✗ 日志写入失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
|
||
def _should_log(self, level: str) -> bool:
|
||
"""
|
||
检查是否应该记录指定级别的日志
|
||
|
||
Args:
|
||
level: 日志级别
|
||
|
||
Returns:
|
||
是否应该记录日志
|
||
"""
|
||
try:
|
||
log_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
||
current_level = self.monitor_config["log_level"]
|
||
|
||
if current_level not in log_levels:
|
||
return True # 默认记录所有日志
|
||
|
||
if level not in log_levels:
|
||
return True # 默认记录未知级别的日志
|
||
|
||
current_index = log_levels.index(current_level)
|
||
level_index = log_levels.index(level)
|
||
|
||
# 只记录等于或高于当前级别的日志
|
||
return level_index >= current_index
|
||
|
||
except Exception as e:
|
||
print(f"✗ 日志级别检查失败: {e}")
|
||
return True
|
||
|
||
def get_performance_metrics(self, metric_name: str = None, limit: int = 100) -> Any:
|
||
"""
|
||
获取性能指标
|
||
|
||
Args:
|
||
metric_name: 指标名称(可选)
|
||
limit: 限制返回的数量
|
||
|
||
Returns:
|
||
性能指标数据
|
||
"""
|
||
try:
|
||
with self.monitor_lock:
|
||
if metric_name:
|
||
if metric_name in self.performance_metrics:
|
||
metrics = list(self.performance_metrics[metric_name])
|
||
return metrics[-limit:] if len(metrics) > limit else metrics
|
||
else:
|
||
return []
|
||
else:
|
||
# 返回所有指标
|
||
result = {}
|
||
for name, metrics in self.performance_metrics.items():
|
||
metric_list = list(metrics)
|
||
result[name] = metric_list[-limit:] if len(metric_list) > limit else metric_list
|
||
return result
|
||
|
||
except Exception as e:
|
||
print(f"✗ 获取性能指标失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
return {}
|
||
|
||
def get_active_alerts(self) -> Dict[str, Any]:
|
||
"""
|
||
获取活动警报
|
||
|
||
Returns:
|
||
活动警报字典
|
||
"""
|
||
try:
|
||
with self.monitor_lock:
|
||
return self.active_alerts.copy()
|
||
except Exception as e:
|
||
print(f"✗ 获取活动警报失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
return {}
|
||
|
||
def get_alert_history(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||
"""
|
||
获取警报历史
|
||
|
||
Args:
|
||
limit: 限制返回的数量
|
||
|
||
Returns:
|
||
警报历史列表
|
||
"""
|
||
try:
|
||
with self.monitor_lock:
|
||
history = list(self.alert_history)
|
||
return history[-limit:] if len(history) > limit else history
|
||
except Exception as e:
|
||
print(f"✗ 获取警报历史失败: {e}")
|
||
self.monitor_stats["monitor_errors"] += 1
|
||
return []
|
||
|
||
def get_monitor_stats(self) -> Dict[str, Any]:
|
||
"""
|
||
获取监控统计信息
|
||
|
||
Returns:
|
||
监控统计字典
|
||
"""
|
||
return {
|
||
"state": self.monitor_state.copy(),
|
||
"stats": self.monitor_stats.copy(),
|
||
"config": self.monitor_config.copy(),
|
||
"active_alerts_count": len(self.active_alerts),
|
||
"alert_history_count": len(self.alert_history)
|
||
}
|
||
|
||
def reset_stats(self):
|
||
"""重置监控统计信息"""
|
||
try:
|
||
self.monitor_stats = {
|
||
"metrics_collected": 0,
|
||
"logs_written": 0,
|
||
"alerts_generated": 0,
|
||
"alerts_resolved": 0,
|
||
"performance_checks": 0,
|
||
"monitor_errors": 0
|
||
}
|
||
|
||
self.monitor_state["total_logs"] = 0
|
||
self.monitor_state["total_alerts"] = 0
|
||
self.monitor_state["active_alerts"] = 0
|
||
|
||
print("✓ 监控统计信息已重置")
|
||
except Exception as e:
|
||
print(f"✗ 监控统计信息重置失败: {e}")
|
||
|
||
def set_monitor_config(self, config: Dict[str, Any]) -> bool:
|
||
"""
|
||
设置监控配置
|
||
|
||
Args:
|
||
config: 监控配置字典
|
||
|
||
Returns:
|
||
是否设置成功
|
||
"""
|
||
try:
|
||
self.monitor_config.update(config)
|
||
print(f"✓ 监控配置已更新: {self.monitor_config}")
|
||
return True
|
||
except Exception as e:
|
||
print(f"✗ 监控配置设置失败: {e}")
|
||
return False
|
||
|
||
def get_monitor_config(self) -> Dict[str, Any]:
|
||
"""
|
||
获取监控配置
|
||
|
||
Returns:
|
||
监控配置字典
|
||
"""
|
||
return self.monitor_config.copy()
|
||
|
||
def _trigger_monitor_callback(self, callback_type: str, data: Dict[str, Any]):
|
||
"""
|
||
触发监控回调
|
||
|
||
Args:
|
||
callback_type: 回调类型
|
||
data: 回调数据
|
||
"""
|
||
try:
|
||
if callback_type in self.monitor_callbacks:
|
||
for callback in self.monitor_callbacks[callback_type]:
|
||
try:
|
||
callback(data)
|
||
except Exception as e:
|
||
print(f"✗ 监控回调执行失败: {callback_type} - {e}")
|
||
except Exception as e:
|
||
print(f"✗ 监控回调触发失败: {e}")
|
||
|
||
def register_monitor_callback(self, callback_type: str, callback: callable):
|
||
"""
|
||
注册监控回调
|
||
|
||
Args:
|
||
callback_type: 回调类型
|
||
callback: 回调函数
|
||
"""
|
||
try:
|
||
if callback_type in self.monitor_callbacks:
|
||
self.monitor_callbacks[callback_type].append(callback)
|
||
print(f"✓ 监控回调已注册: {callback_type}")
|
||
else:
|
||
print(f"✗ 无效的回调类型: {callback_type}")
|
||
except Exception as e:
|
||
print(f"✗ 监控回调注册失败: {e}")
|
||
|
||
def unregister_monitor_callback(self, callback_type: str, callback: callable):
|
||
"""
|
||
注销监控回调
|
||
|
||
Args:
|
||
callback_type: 回调类型
|
||
callback: 回调函数
|
||
"""
|
||
try:
|
||
if callback_type in self.monitor_callbacks:
|
||
if callback in self.monitor_callbacks[callback_type]:
|
||
self.monitor_callbacks[callback_type].remove(callback)
|
||
print(f"✓ 监控回调已注销: {callback_type}")
|
||
else:
|
||
print(f"✗ 无效的回调类型: {callback_type}")
|
||
except Exception as e:
|
||
print(f"✗ 监控回调注销失败: {e}") |