EG/plugins/user/server_architecture/loadbalancer/load_balancer.py
2025-12-12 16:16:15 +08:00

793 lines
29 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
负载均衡器模块
负责在多个服务器实例间分配负载
"""
import time
import threading
import random
import hashlib
from typing import Dict, Any, List, Optional
class LoadBalancer:
"""
负载均衡器
负责在多个服务器实例间分配负载
"""
def __init__(self, plugin):
"""
初始化负载均衡器
Args:
plugin: 服务器架构插件实例
"""
self.plugin = plugin
self.enabled = False
self.initialized = False
# 负载均衡配置
self.loadbalancer_config = {
"enable_load_balancing": True,
"balancing_algorithm": "round_robin", # round_robin, least_connections, weighted_round_robin, ip_hash
"health_check_interval": 30.0,
"enable_health_check": True,
"max_retry_attempts": 3,
"fallback_strategy": "random", # random, next_available
"enable_session_stickiness": False,
"session_timeout": 3600, # 1小时
"enable_auto_scaling": False,
"min_servers": 1,
"max_servers": 10
}
# 服务器实例列表
self.server_instances = {}
# 负载均衡状态
self.loadbalancer_state = {
"current_instance_index": 0,
"session_map": {}, # 客户端IP到服务器实例的映射
"last_health_check": 0.0,
"total_requests": 0,
"successful_requests": 0,
"failed_requests": 0
}
# 负载均衡统计
self.loadbalancer_stats = {
"instances_added": 0,
"instances_removed": 0,
"health_checks": 0,
"failed_health_checks": 0,
"requests_routed": 0,
"errors": 0
}
# 线程锁
self.lb_lock = threading.RLock()
# 回调函数
self.loadbalancer_callbacks = {
"instance_added": [],
"instance_removed": [],
"health_check_passed": [],
"health_check_failed": [],
"request_routed": [],
"load_balancer_error": []
}
# 时间戳记录
self.last_health_check = 0.0
self.last_request = 0.0
print("✓ 负载均衡器已创建")
def initialize(self) -> bool:
"""
初始化负载均衡器
Returns:
是否初始化成功
"""
try:
print("正在初始化负载均衡器...")
# 添加本地服务器实例
self.add_server_instance("localhost", {
"host": "127.0.0.1",
"port": self.plugin.server_manager.server_config["port"] if self.plugin.server_manager else 8080,
"weight": 1,
"max_connections": 1000,
"current_connections": 0,
"status": "active",
"last_health_check": time.time(),
"health_status": "healthy"
})
self.initialized = True
print("✓ 负载均衡器初始化完成")
return True
except Exception as e:
print(f"✗ 负载均衡器初始化失败: {e}")
self.loadbalancer_stats["errors"] += 1
import traceback
traceback.print_exc()
return False
def enable(self) -> bool:
"""
启用负载均衡器
Returns:
是否启用成功
"""
try:
if not self.initialized:
print("✗ 负载均衡器未初始化")
return False
self.enabled = True
print("✓ 负载均衡器已启用")
return True
except Exception as e:
print(f"✗ 负载均衡器启用失败: {e}")
self.loadbalancer_stats["errors"] += 1
import traceback
traceback.print_exc()
return False
def disable(self):
"""禁用负载均衡器"""
try:
self.enabled = False
# 清理会话映射
with self.lb_lock:
self.loadbalancer_state["session_map"].clear()
print("✓ 负载均衡器已禁用")
except Exception as e:
print(f"✗ 负载均衡器禁用失败: {e}")
self.loadbalancer_stats["errors"] += 1
import traceback
traceback.print_exc()
def finalize(self):
"""清理负载均衡器资源"""
try:
# 禁用负载均衡器
if self.enabled:
self.disable()
# 清理回调
self.loadbalancer_callbacks.clear()
self.initialized = False
print("✓ 负载均衡器资源已清理")
except Exception as e:
print(f"✗ 负载均衡器资源清理失败: {e}")
import traceback
traceback.print_exc()
def update(self, dt: float):
"""
更新负载均衡器状态
Args:
dt: 时间增量(秒)
"""
try:
if not self.enabled:
return
current_time = time.time()
# 定期执行健康检查
if (self.loadbalancer_config["enable_health_check"] and
current_time - self.last_health_check > self.loadbalancer_config["health_check_interval"]):
self._perform_health_check()
self.last_health_check = current_time
# 清理过期会话
if self.loadbalancer_config["enable_session_stickiness"]:
self._cleanup_expired_sessions(current_time)
except Exception as e:
print(f"✗ 负载均衡器更新失败: {e}")
self.loadbalancer_stats["errors"] += 1
import traceback
traceback.print_exc()
def _perform_health_check(self):
"""执行健康检查"""
try:
with self.lb_lock:
for instance_id, instance_data in self.server_instances.items():
try:
# 执行健康检查(简化实现)
is_healthy = self._check_instance_health(instance_data)
# 更新实例健康状态
instance_data["last_health_check"] = time.time()
instance_data["health_status"] = "healthy" if is_healthy else "unhealthy"
self.loadbalancer_stats["health_checks"] += 1
# 触发健康检查回调
if is_healthy:
self._trigger_loadbalancer_callback("health_check_passed", {
"instance_id": instance_id,
"timestamp": time.time()
})
else:
self.loadbalancer_stats["failed_health_checks"] += 1
self._trigger_loadbalancer_callback("health_check_failed", {
"instance_id": instance_id,
"timestamp": time.time()
})
except Exception as e:
print(f"✗ 实例健康检查失败: {instance_id} - {e}")
self.loadbalancer_stats["errors"] += 1
except Exception as e:
print(f"✗ 健康检查执行失败: {e}")
self.loadbalancer_stats["errors"] += 1
def _check_instance_health(self, instance_data: Dict[str, Any]) -> bool:
"""
检查实例健康状态
Args:
instance_data: 实例数据
Returns:
实例是否健康
"""
try:
# 简化实现:检查实例状态和最后检查时间
if instance_data["status"] != "active":
return False
# 检查连接数是否超过限制
if instance_data["current_connections"] > instance_data["max_connections"]:
return False
# 检查最后健康检查时间
if time.time() - instance_data["last_health_check"] > self.loadbalancer_config["health_check_interval"] * 2:
return False
return True
except Exception as e:
print(f"✗ 实例健康状态检查失败: {e}")
self.loadbalancer_stats["errors"] += 1
return False
def _cleanup_expired_sessions(self, current_time: float):
"""清理过期会话"""
try:
expired_sessions = []
with self.lb_lock:
for client_ip, session_data in self.loadbalancer_state["session_map"].items():
if current_time - session_data["timestamp"] > self.loadbalancer_config["session_timeout"]:
expired_sessions.append(client_ip)
for client_ip in expired_sessions:
with self.lb_lock:
del self.loadbalancer_state["session_map"][client_ip]
except Exception as e:
print(f"✗ 过期会话清理失败: {e}")
self.loadbalancer_stats["errors"] += 1
def add_server_instance(self, instance_id: str, instance_data: Dict[str, Any]) -> bool:
"""
添加服务器实例
Args:
instance_id: 实例ID
instance_data: 实例数据
Returns:
是否添加成功
"""
try:
if not self.enabled:
return False
with self.lb_lock:
self.server_instances[instance_id] = instance_data
self.loadbalancer_stats["instances_added"] += 1
# 触发实例添加回调
self._trigger_loadbalancer_callback("instance_added", {
"instance_id": instance_id,
"instance_data": instance_data,
"timestamp": time.time()
})
print(f"✓ 服务器实例已添加: {instance_id}")
return True
except Exception as e:
print(f"✗ 添加服务器实例失败: {e}")
self.loadbalancer_stats["errors"] += 1
return False
def remove_server_instance(self, instance_id: str) -> bool:
"""
移除服务器实例
Args:
instance_id: 实例ID
Returns:
是否移除成功
"""
try:
if not self.enabled:
return False
with self.lb_lock:
if instance_id in self.server_instances:
del self.server_instances[instance_id]
# 移除相关的会话映射
clients_to_remove = []
for client_ip, session_data in self.loadbalancer_state["session_map"].items():
if session_data["instance_id"] == instance_id:
clients_to_remove.append(client_ip)
for client_ip in clients_to_remove:
del self.loadbalancer_state["session_map"][client_ip]
self.loadbalancer_stats["instances_removed"] += 1
# 触发实例移除回调
self._trigger_loadbalancer_callback("instance_removed", {
"instance_id": instance_id,
"timestamp": time.time()
})
print(f"✓ 服务器实例已移除: {instance_id}")
return True
except Exception as e:
print(f"✗ 移除服务器实例失败: {e}")
self.loadbalancer_stats["errors"] += 1
return False
def get_server_instance(self, client_ip: str = None) -> Optional[Dict[str, Any]]:
"""
获取服务器实例(根据负载均衡算法)
Args:
client_ip: 客户端IP用于会话粘性
Returns:
服务器实例数据或None
"""
try:
if not self.enabled or not self.loadbalancer_config["enable_load_balancing"]:
# 如果未启用负载均衡,返回本地实例
with self.lb_lock:
if "localhost" in self.server_instances:
return self.server_instances["localhost"].copy()
elif self.server_instances:
# 返回第一个实例
first_instance = next(iter(self.server_instances.values()))
return first_instance.copy()
else:
return None
# 根据算法选择实例
algorithm = self.loadbalancer_config["balancing_algorithm"]
with self.lb_lock:
active_instances = {
k: v for k, v in self.server_instances.items()
if v["status"] == "active" and v["health_status"] == "healthy"
}
if not active_instances:
return None
if algorithm == "round_robin":
return self._round_robin_selection(active_instances)
elif algorithm == "least_connections":
return self._least_connections_selection(active_instances)
elif algorithm == "weighted_round_robin":
return self._weighted_round_robin_selection(active_instances)
elif algorithm == "ip_hash" and client_ip:
return self._ip_hash_selection(active_instances, client_ip)
else:
# 默认使用轮询
return self._round_robin_selection(active_instances)
except Exception as e:
print(f"✗ 获取服务器实例失败: {e}")
self.loadbalancer_stats["errors"] += 1
return None
def _round_robin_selection(self, active_instances: Dict[str, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""
轮询选择算法
Args:
active_instances: 活跃实例字典
Returns:
选中的实例数据
"""
try:
instance_ids = list(active_instances.keys())
if not instance_ids:
return None
# 选择下一个实例
selected_index = self.loadbalancer_state["current_instance_index"] % len(instance_ids)
selected_instance_id = instance_ids[selected_index]
# 更新索引
self.loadbalancer_state["current_instance_index"] = (selected_index + 1) % len(instance_ids)
return active_instances[selected_instance_id].copy()
except Exception as e:
print(f"✗ 轮询选择失败: {e}")
self.loadbalancer_stats["errors"] += 1
return None
def _least_connections_selection(self, active_instances: Dict[str, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""
最少连接选择算法
Args:
active_instances: 活跃实例字典
Returns:
选中的实例数据
"""
try:
if not active_instances:
return None
# 选择连接数最少的实例
selected_instance_id = min(
active_instances.keys(),
key=lambda k: active_instances[k]["current_connections"]
)
return active_instances[selected_instance_id].copy()
except Exception as e:
print(f"✗ 最少连接选择失败: {e}")
self.loadbalancer_stats["errors"] += 1
return None
def _weighted_round_robin_selection(self, active_instances: Dict[str, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""
加权轮询选择算法
Args:
active_instances: 活跃实例字典
Returns:
选中的实例数据
"""
try:
if not active_instances:
return None
# 创建加权实例列表
weighted_instances = []
for instance_id, instance_data in active_instances.items():
weight = instance_data.get("weight", 1)
for _ in range(weight):
weighted_instances.append(instance_id)
if not weighted_instances:
return None
# 选择实例
selected_index = self.loadbalancer_state["current_instance_index"] % len(weighted_instances)
selected_instance_id = weighted_instances[selected_index]
# 更新索引
self.loadbalancer_state["current_instance_index"] = (selected_index + 1) % len(weighted_instances)
return active_instances[selected_instance_id].copy()
except Exception as e:
print(f"✗ 加权轮询选择失败: {e}")
self.loadbalancer_stats["errors"] += 1
return None
def _ip_hash_selection(self, active_instances: Dict[str, Dict[str, Any]], client_ip: str) -> Optional[Dict[str, Any]]:
"""
IP哈希选择算法
Args:
active_instances: 活跃实例字典
client_ip: 客户端IP
Returns:
选中的实例数据
"""
try:
if not active_instances or not client_ip:
return None
# 检查会话粘性
if self.loadbalancer_config["enable_session_stickiness"]:
with self.lb_lock:
if client_ip in self.loadbalancer_state["session_map"]:
session_data = self.loadbalancer_state["session_map"][client_ip]
if time.time() - session_data["timestamp"] <= self.loadbalancer_config["session_timeout"]:
instance_id = session_data["instance_id"]
if instance_id in active_instances:
return active_instances[instance_id].copy()
# 使用IP哈希选择实例
instance_ids = list(active_instances.keys())
hash_value = int(hashlib.md5(client_ip.encode()).hexdigest(), 16)
selected_index = hash_value % len(instance_ids)
selected_instance_id = instance_ids[selected_index]
# 更新会话映射
if self.loadbalancer_config["enable_session_stickiness"]:
with self.lb_lock:
self.loadbalancer_state["session_map"][client_ip] = {
"instance_id": selected_instance_id,
"timestamp": time.time()
}
return active_instances[selected_instance_id].copy()
except Exception as e:
print(f"✗ IP哈希选择失败: {e}")
self.loadbalancer_stats["errors"] += 1
return None
def route_request(self, client_ip: str = None) -> Optional[Dict[str, Any]]:
"""
路由请求到合适的服务器实例
Args:
client_ip: 客户端IP
Returns:
服务器实例数据或None
"""
try:
if not self.enabled:
return None
# 获取服务器实例
instance_data = self.get_server_instance(client_ip)
if not instance_data:
return None
self.loadbalancer_stats["requests_routed"] += 1
self.loadbalancer_state["total_requests"] += 1
# 增加实例连接数
with self.lb_lock:
instance_id = instance_data.get("instance_id")
if instance_id and instance_id in self.server_instances:
self.server_instances[instance_id]["current_connections"] += 1
# 触发请求路由回调
self._trigger_loadbalancer_callback("request_routed", {
"instance_id": instance_data.get("instance_id"),
"client_ip": client_ip,
"timestamp": time.time()
})
self.last_request = time.time()
return instance_data
except Exception as e:
print(f"✗ 请求路由失败: {e}")
self.loadbalancer_stats["errors"] += 1
self.loadbalancer_state["failed_requests"] += 1
return None
def release_instance_connection(self, instance_id: str):
"""
释放实例连接
Args:
instance_id: 实例ID
"""
try:
with self.lb_lock:
if instance_id in self.server_instances:
if self.server_instances[instance_id]["current_connections"] > 0:
self.server_instances[instance_id]["current_connections"] -= 1
except Exception as e:
print(f"✗ 释放实例连接失败: {e}")
self.loadbalancer_stats["errors"] += 1
def get_instance_stats(self, instance_id: str) -> Optional[Dict[str, Any]]:
"""
获取实例统计信息
Args:
instance_id: 实例ID
Returns:
实例统计信息或None
"""
try:
with self.lb_lock:
if instance_id in self.server_instances:
return self.server_instances[instance_id].copy()
else:
return None
except Exception as e:
print(f"✗ 获取实例统计信息失败: {e}")
self.loadbalancer_stats["errors"] += 1
return None
def get_all_instances(self) -> Dict[str, Dict[str, Any]]:
"""
获取所有实例信息
Returns:
所有实例信息字典
"""
try:
with self.lb_lock:
return {k: v.copy() for k, v in self.server_instances.items()}
except Exception as e:
print(f"✗ 获取所有实例信息失败: {e}")
self.loadbalancer_stats["errors"] += 1
return {}
def update_instance_status(self, instance_id: str, status: str) -> bool:
"""
更新实例状态
Args:
instance_id: 实例ID
status: 状态active, inactive, maintenance
Returns:
是否更新成功
"""
try:
with self.lb_lock:
if instance_id in self.server_instances:
self.server_instances[instance_id]["status"] = status
return True
else:
return False
except Exception as e:
print(f"✗ 更新实例状态失败: {e}")
self.loadbalancer_stats["errors"] += 1
return False
def get_loadbalancer_stats(self) -> Dict[str, Any]:
"""
获取负载均衡器统计信息
Returns:
负载均衡器统计字典
"""
return {
"state": self.loadbalancer_state.copy(),
"stats": self.loadbalancer_stats.copy(),
"config": self.loadbalancer_config.copy(),
"instances_count": len(self.server_instances)
}
def reset_stats(self):
"""重置负载均衡器统计信息"""
try:
self.loadbalancer_stats = {
"instances_added": 0,
"instances_removed": 0,
"health_checks": 0,
"failed_health_checks": 0,
"requests_routed": 0,
"errors": 0
}
self.loadbalancer_state["total_requests"] = 0
self.loadbalancer_state["successful_requests"] = 0
self.loadbalancer_state["failed_requests"] = 0
print("✓ 负载均衡器统计信息已重置")
except Exception as e:
print(f"✗ 负载均衡器统计信息重置失败: {e}")
def set_loadbalancer_config(self, config: Dict[str, Any]) -> bool:
"""
设置负载均衡器配置
Args:
config: 负载均衡器配置字典
Returns:
是否设置成功
"""
try:
self.loadbalancer_config.update(config)
print(f"✓ 负载均衡器配置已更新: {self.loadbalancer_config}")
return True
except Exception as e:
print(f"✗ 负载均衡器配置设置失败: {e}")
return False
def get_loadbalancer_config(self) -> Dict[str, Any]:
"""
获取负载均衡器配置
Returns:
负载均衡器配置字典
"""
return self.loadbalancer_config.copy()
def _trigger_loadbalancer_callback(self, callback_type: str, data: Dict[str, Any]):
"""
触发负载均衡器回调
Args:
callback_type: 回调类型
data: 回调数据
"""
try:
if callback_type in self.loadbalancer_callbacks:
for callback in self.loadbalancer_callbacks[callback_type]:
try:
callback(data)
except Exception as e:
print(f"✗ 负载均衡器回调执行失败: {callback_type} - {e}")
except Exception as e:
print(f"✗ 负载均衡器回调触发失败: {e}")
def register_loadbalancer_callback(self, callback_type: str, callback: callable):
"""
注册负载均衡器回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.loadbalancer_callbacks:
self.loadbalancer_callbacks[callback_type].append(callback)
print(f"✓ 负载均衡器回调已注册: {callback_type}")
else:
print(f"✗ 无效的回调类型: {callback_type}")
except Exception as e:
print(f"✗ 负载均衡器回调注册失败: {e}")
def unregister_loadbalancer_callback(self, callback_type: str, callback: callable):
"""
注销负载均衡器回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.loadbalancer_callbacks:
if callback in self.loadbalancer_callbacks[callback_type]:
self.loadbalancer_callbacks[callback_type].remove(callback)
print(f"✓ 负载均衡器回调已注销: {callback_type}")
else:
print(f"✗ 无效的回调类型: {callback_type}")
except Exception as e:
print(f"✗ 负载均衡器回调注销失败: {e}")