EG/plugins/user/realtime_communication/serialization/data_serializer.py
2025-12-12 16:16:15 +08:00

773 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
数据序列化器模块
将消息序列化为网络数据
"""
import time
import json
import pickle
import zlib
from typing import Dict, Any, List, Optional, Union
class DataSerializer:
"""
数据序列化器
将消息序列化为网络数据
"""
def __init__(self, plugin):
"""
初始化数据序列化器
Args:
plugin: 实时通信插件实例
"""
self.plugin = plugin
self.enabled = False
self.initialized = False
# 序列化配置
self.serializer_config = {
"default_format": "json",
"supported_formats": ["json", "pickle", "binary"],
"enable_compression": True,
"compression_threshold": 1024, # 1KB
"enable_caching": True,
"cache_size": 1000,
"enable_incremental_encoding": True,
"enable_type_preservation": True
}
# 序列化状态
self.serializer_state = {
"cached_objects": 0,
"compressed_objects": 0,
"serialized_objects": 0
}
# 序列化统计
self.serializer_stats = {
"objects_serialized": 0,
"objects_deserialized": 0,
"bytes_serialized": 0,
"bytes_deserialized": 0,
"compression_savings": 0,
"cache_hits": 0,
"cache_misses": 0,
"serializer_errors": 0
}
# 缓存管理
self.serialization_cache = {}
self.cache_timestamps = {}
# 序列化器映射
self.serializers = {
"json": {
"serialize": self._serialize_json,
"deserialize": self._deserialize_json
},
"pickle": {
"serialize": self._serialize_pickle,
"deserialize": self._deserialize_pickle
},
"binary": {
"serialize": self._serialize_binary,
"deserialize": self._deserialize_binary
}
}
# 回调函数
self.serializer_callbacks = {
"object_serialized": [],
"object_deserialized": [],
"compression_applied": [],
"cache_hit": [],
"serializer_error": []
}
# 时间戳记录
self.last_serialization = 0.0
self.last_cache_cleanup = 0.0
print("✓ 数据序列化器已创建")
def initialize(self) -> bool:
"""
初始化数据序列化器
Returns:
是否初始化成功
"""
try:
print("正在初始化数据序列化器...")
self.initialized = True
print("✓ 数据序列化器初始化完成")
return True
except Exception as e:
print(f"✗ 数据序列化器初始化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
import traceback
traceback.print_exc()
return False
def enable(self) -> bool:
"""
启用数据序列化器
Returns:
是否启用成功
"""
try:
if not self.initialized:
print("✗ 数据序列化器未初始化")
return False
self.enabled = True
print("✓ 数据序列化器已启用")
return True
except Exception as e:
print(f"✗ 数据序列化器启用失败: {e}")
self.serializer_stats["serializer_errors"] += 1
import traceback
traceback.print_exc()
return False
def disable(self):
"""禁用数据序列化器"""
try:
self.enabled = False
# 清理缓存
self.serialization_cache.clear()
self.cache_timestamps.clear()
print("✓ 数据序列化器已禁用")
except Exception as e:
print(f"✗ 数据序列化器禁用失败: {e}")
self.serializer_stats["serializer_errors"] += 1
import traceback
traceback.print_exc()
def finalize(self):
"""清理数据序列化器资源"""
try:
# 禁用数据序列化器
if self.enabled:
self.disable()
# 清理回调
self.serializer_callbacks.clear()
self.initialized = False
print("✓ 数据序列化器资源已清理")
except Exception as e:
print(f"✗ 数据序列化器资源清理失败: {e}")
import traceback
traceback.print_exc()
def update(self, dt: float):
"""
更新数据序列化器状态
Args:
dt: 时间增量(秒)
"""
try:
if not self.enabled:
return
current_time = time.time()
# 定期清理缓存
if current_time - self.last_cache_cleanup > 300: # 每5分钟清理一次
self._cleanup_cache(current_time)
self.last_cache_cleanup = current_time
self.last_serialization = current_time
except Exception as e:
print(f"✗ 数据序列化器更新失败: {e}")
self.serializer_stats["serializer_errors"] += 1
import traceback
traceback.print_exc()
def _cleanup_cache(self, current_time: float):
"""清理缓存"""
try:
if not self.serializer_config["enable_caching"]:
return
expired_keys = []
for key, timestamp in self.cache_timestamps.items():
# 缓存对象超过10分钟未使用则清除
if current_time - timestamp > 600:
expired_keys.append(key)
for key in expired_keys:
if key in self.serialization_cache:
del self.serialization_cache[key]
if key in self.cache_timestamps:
del self.cache_timestamps[key]
self.serializer_state["cached_objects"] = len(self.serialization_cache)
except Exception as e:
print(f"✗ 缓存清理失败: {e}")
self.serializer_stats["serializer_errors"] += 1
def serialize(self, obj: Any, format: str = None) -> Optional[bytes]:
"""
序列化对象
Args:
obj: 要序列化的对象
format: 序列化格式
Returns:
序列化后的字节数据或None
"""
try:
if not self.enabled:
return None
# 使用默认格式或指定格式
if format is None:
format = self.serializer_config["default_format"]
# 验证格式支持
if format not in self.serializers:
print(f"✗ 不支持的序列化格式: {format}")
self.serializer_stats["serializer_errors"] += 1
return None
# 检查缓存
cache_key = None
if self.serializer_config["enable_caching"]:
cache_key = self._generate_cache_key(obj, format)
if cache_key in self.serialization_cache:
self.serializer_stats["cache_hits"] += 1
self.cache_timestamps[cache_key] = time.time()
# 触发缓存命中回调
self._trigger_serializer_callback("cache_hit", {
"cache_key": cache_key,
"format": format,
"timestamp": time.time()
})
return self.serialization_cache[cache_key]
else:
self.serializer_stats["cache_misses"] += 1
# 获取序列化器
serializer = self.serializers[format]["serialize"]
# 序列化对象
serialized_data = serializer(obj)
if serialized_data is None:
return None
# 更新统计
self.serializer_stats["objects_serialized"] += 1
self.serializer_stats["bytes_serialized"] += len(serialized_data)
self.serializer_state["serialized_objects"] += 1
# 缓存结果
if (self.serializer_config["enable_caching"] and
cache_key and
len(self.serialization_cache) < self.serializer_config["cache_size"]):
self.serialization_cache[cache_key] = serialized_data
self.cache_timestamps[cache_key] = time.time()
self.serializer_state["cached_objects"] = len(self.serialization_cache)
# 触发对象序列化回调
self._trigger_serializer_callback("object_serialized", {
"format": format,
"data_size": len(serialized_data),
"timestamp": time.time()
})
return serialized_data
except Exception as e:
print(f"✗ 对象序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def deserialize(self, data: bytes, format: str = None) -> Optional[Any]:
"""
反序列化对象
Args:
data: 要反序列化的字节数据
format: 序列化格式
Returns:
反序列化后的对象或None
"""
try:
if not self.enabled:
return None
# 使用默认格式或指定格式
if format is None:
format = self.serializer_config["default_format"]
# 验证格式支持
if format not in self.serializers:
print(f"✗ 不支持的反序列化格式: {format}")
self.serializer_stats["serializer_errors"] += 1
return None
# 获取反序列化器
deserializer = self.serializers[format]["deserialize"]
# 反序列化对象
deserialized_obj = deserializer(data)
if deserialized_obj is None:
return None
# 更新统计
self.serializer_stats["objects_deserialized"] += 1
self.serializer_stats["bytes_deserialized"] += len(data)
# 触发对象反序列化回调
self._trigger_serializer_callback("object_deserialized", {
"format": format,
"data_size": len(data),
"timestamp": time.time()
})
return deserialized_obj
except Exception as e:
print(f"✗ 对象反序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def _serialize_json(self, obj: Any) -> Optional[bytes]:
"""
JSON序列化
Args:
obj: 要序列化的对象
Returns:
序列化后的字节数据或None
"""
try:
# 序列化为JSON字符串
json_string = json.dumps(obj, ensure_ascii=False, default=self._json_default)
serialized_data = json_string.encode('utf-8')
# 检查是否需要压缩
if (self.serializer_config["enable_compression"] and
len(serialized_data) > self.serializer_config["compression_threshold"]):
compressed_data = zlib.compress(serialized_data)
if len(compressed_data) < len(serialized_data):
# 使用压缩版本
serialized_data = compressed_data
self.serializer_state["compressed_objects"] += 1
self.serializer_stats["compression_savings"] += len(serialized_data) - len(compressed_data)
# 触发压缩应用回调
self._trigger_serializer_callback("compression_applied", {
"original_size": len(json_string.encode('utf-8')),
"compressed_size": len(compressed_data),
"compression_ratio": len(compressed_data) / len(json_string.encode('utf-8')),
"timestamp": time.time()
})
return serialized_data
except Exception as e:
print(f"✗ JSON序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def _deserialize_json(self, data: bytes) -> Optional[Any]:
"""
JSON反序列化
Args:
data: 要反序列化的字节数据
Returns:
反序列化后的对象或None
"""
try:
# 尝试解压缩
try:
decompressed_data = zlib.decompress(data)
data = decompressed_data
except zlib.error:
# 不是压缩数据,使用原始数据
pass
# 解码为JSON
json_string = data.decode('utf-8')
deserialized_obj = json.loads(json_string)
return deserialized_obj
except Exception as e:
print(f"✗ JSON反序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def _json_default(self, obj):
"""
JSON默认序列化方法
Args:
obj: 要序列化的对象
Returns:
可序列化的对象
"""
# 处理不能直接序列化的对象
return str(obj)
def _serialize_pickle(self, obj: Any) -> Optional[bytes]:
"""
Pickle序列化
Args:
obj: 要序列化的对象
Returns:
序列化后的字节数据或None
"""
try:
# 序列化为pickle数据
pickle_data = pickle.dumps(obj)
# 检查是否需要压缩
if (self.serializer_config["enable_compression"] and
len(pickle_data) > self.serializer_config["compression_threshold"]):
compressed_data = zlib.compress(pickle_data)
if len(compressed_data) < len(pickle_data):
# 使用压缩版本
pickle_data = compressed_data
self.serializer_state["compressed_objects"] += 1
self.serializer_stats["compression_savings"] += len(pickle_data) - len(compressed_data)
# 触发压缩应用回调
self._trigger_serializer_callback("compression_applied", {
"original_size": len(pickle.dumps(obj)),
"compressed_size": len(compressed_data),
"compression_ratio": len(compressed_data) / len(pickle.dumps(obj)),
"timestamp": time.time()
})
return pickle_data
except Exception as e:
print(f"✗ Pickle序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def _deserialize_pickle(self, data: bytes) -> Optional[Any]:
"""
Pickle反序列化
Args:
data: 要反序列化的字节数据
Returns:
反序列化后的对象或None
"""
try:
# 尝试解压缩
try:
decompressed_data = zlib.decompress(data)
data = decompressed_data
except zlib.error:
# 不是压缩数据,使用原始数据
pass
# 反序列化pickle数据
deserialized_obj = pickle.loads(data)
return deserialized_obj
except Exception as e:
print(f"✗ Pickle反序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def _serialize_binary(self, obj: Any) -> Optional[bytes]:
"""
二进制序列化
Args:
obj: 要序列化的对象
Returns:
序列化后的字节数据或None
"""
try:
# 简化实现将对象转换为JSON再编码为二进制
json_string = json.dumps(obj, ensure_ascii=False, default=self._json_default)
binary_data = json_string.encode('utf-8')
# 添加简单的头部信息
header = len(binary_data).to_bytes(4, byteorder='big')
serialized_data = header + binary_data
# 检查是否需要压缩
if (self.serializer_config["enable_compression"] and
len(serialized_data) > self.serializer_config["compression_threshold"]):
compressed_data = zlib.compress(serialized_data)
if len(compressed_data) < len(serialized_data):
# 使用压缩版本
serialized_data = compressed_data
self.serializer_state["compressed_objects"] += 1
self.serializer_stats["compression_savings"] += len(serialized_data) - len(compressed_data)
# 触发压缩应用回调
self._trigger_serializer_callback("compression_applied", {
"original_size": len(header + binary_data),
"compressed_size": len(compressed_data),
"compression_ratio": len(compressed_data) / len(header + binary_data),
"timestamp": time.time()
})
return serialized_data
except Exception as e:
print(f"✗ 二进制序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def _deserialize_binary(self, data: bytes) -> Optional[Any]:
"""
二进制反序列化
Args:
data: 要反序列化的字节数据
Returns:
反序列化后的对象或None
"""
try:
# 尝试解压缩
try:
decompressed_data = zlib.decompress(data)
data = decompressed_data
except zlib.error:
# 不是压缩数据,使用原始数据
pass
# 解析头部信息
if len(data) < 4:
raise ValueError("数据太短")
message_length = int.from_bytes(data[:4], byteorder='big')
binary_data = data[4:4 + message_length]
# 解码为JSON
json_string = binary_data.decode('utf-8')
deserialized_obj = json.loads(json_string)
return deserialized_obj
except Exception as e:
print(f"✗ 二进制反序列化失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return None
def _generate_cache_key(self, obj: Any, format: str) -> str:
"""
生成缓存键
Args:
obj: 对象
format: 格式
Returns:
缓存键
"""
try:
# 简化实现:使用对象的字符串表示和格式生成键
obj_str = str(obj)
cache_key = f"{format}:{hash(obj_str)}:{len(obj_str)}"
return cache_key
except Exception as e:
print(f"✗ 缓存键生成失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return f"error_key_{int(time.time() * 1000000)}"
def add_serializer(self, format: str, serialize_func: callable, deserialize_func: callable) -> bool:
"""
添加序列化器
Args:
format: 格式名称
serialize_func: 序列化函数
deserialize_func: 反序列化函数
Returns:
是否添加成功
"""
try:
self.serializers[format] = {
"serialize": serialize_func,
"deserialize": deserialize_func
}
if format not in self.serializer_config["supported_formats"]:
self.serializer_config["supported_formats"].append(format)
print(f"✓ 序列化器已添加: {format}")
return True
except Exception as e:
print(f"✗ 序列化器添加失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return False
def remove_serializer(self, format: str) -> bool:
"""
移除序列化器
Args:
format: 格式名称
Returns:
是否移除成功
"""
try:
if format in self.serializers:
del self.serializers[format]
if format in self.serializer_config["supported_formats"]:
self.serializer_config["supported_formats"].remove(format)
print(f"✓ 序列化器已移除: {format}")
return True
else:
print(f"✗ 序列化器不存在: {format}")
return False
except Exception as e:
print(f"✗ 序列化器移除失败: {e}")
self.serializer_stats["serializer_errors"] += 1
return False
def get_serializer_stats(self) -> Dict[str, Any]:
"""
获取序列化统计信息
Returns:
序列化统计字典
"""
return {
"state": self.serializer_state.copy(),
"stats": self.serializer_stats.copy(),
"config": self.serializer_config.copy(),
"cache_size": len(self.serialization_cache)
}
def reset_stats(self):
"""重置序列化统计信息"""
try:
self.serializer_stats = {
"objects_serialized": 0,
"objects_deserialized": 0,
"bytes_serialized": 0,
"bytes_deserialized": 0,
"compression_savings": 0,
"cache_hits": 0,
"cache_misses": 0,
"serializer_errors": 0
}
print("✓ 序列化统计信息已重置")
except Exception as e:
print(f"✗ 序列化统计信息重置失败: {e}")
def set_serializer_config(self, config: Dict[str, Any]) -> bool:
"""
设置序列化配置
Args:
config: 序列化配置字典
Returns:
是否设置成功
"""
try:
self.serializer_config.update(config)
print(f"✓ 序列化配置已更新: {self.serializer_config}")
return True
except Exception as e:
print(f"✗ 序列化配置设置失败: {e}")
return False
def get_serializer_config(self) -> Dict[str, Any]:
"""
获取序列化配置
Returns:
序列化配置字典
"""
return self.serializer_config.copy()
def _trigger_serializer_callback(self, callback_type: str, data: Dict[str, Any]):
"""
触发序列化回调
Args:
callback_type: 回调类型
data: 回调数据
"""
try:
if callback_type in self.serializer_callbacks:
for callback in self.serializer_callbacks[callback_type]:
try:
callback(data)
except Exception as e:
print(f"✗ 序列化回调执行失败: {callback_type} - {e}")
except Exception as e:
print(f"✗ 序列化回调触发失败: {e}")
def register_serializer_callback(self, callback_type: str, callback: callable):
"""
注册序列化回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.serializer_callbacks:
self.serializer_callbacks[callback_type].append(callback)
print(f"✓ 序列化回调已注册: {callback_type}")
else:
print(f"✗ 无效的回调类型: {callback_type}")
except Exception as e:
print(f"✗ 序列化回调注册失败: {e}")
def unregister_serializer_callback(self, callback_type: str, callback: callable):
"""
注销序列化回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.serializer_callbacks:
if callback in self.serializer_callbacks[callback_type]:
self.serializer_callbacks[callback_type].remove(callback)
print(f"✓ 序列化回调已注销: {callback_type}")
else:
print(f"✗ 无效的回调类型: {callback_type}")
except Exception as e:
print(f"✗ 序列化回调注销失败: {e}")