EG/plugins/user/network_sync/serialization/data_serializer.py
2025-12-12 16:16:15 +08:00

837 lines
29 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
数据序列化模块
负责数据的序列化和反序列化
"""
import json
import pickle
import struct
from typing import Dict, Any, List, Optional, Union
import zlib
import base64
class DataSerializer:
"""
数据序列化器
负责数据的序列化和反序列化
"""
def __init__(self, plugin):
"""
初始化数据序列化器
Args:
plugin: 网络同步插件实例
"""
self.plugin = plugin
self.enabled = False
self.initialized = False
# 序列化配置
self.serialization_config = {
'default_format': 'json', # json, pickle, binary
'enable_compression': True,
'compression_level': 6,
'enable_base64_encoding': False, # 用于文本传输
'enable_delta_encoding': True, # 启用增量编码
'max_serialization_depth': 10,
'enable_type_preservation': True
}
# 序列化格式支持
self.supported_formats = {
'json': self._serialize_json,
'pickle': self._serialize_pickle,
'binary': self._serialize_binary
}
self.deserialization_methods = {
'json': self._deserialize_json,
'pickle': self._deserialize_pickle,
'binary': self._deserialize_binary
}
# 序列化统计
self.serialization_stats = {
'objects_serialized': 0,
'objects_deserialized': 0,
'bytes_serialized': 0,
'bytes_deserialized': 0,
'compression_savings': 0,
'serialization_errors': 0,
'deserialization_errors': 0
}
# 缓存管理
self.serialization_cache = {}
self.cache_max_size = 1000
self.cache_cleanup_threshold = 0.8
# 回调函数
self.serialization_callbacks = {
'data_serialized': [],
'data_deserialized': [],
'serialization_error': [],
'deserialization_error': []
}
# 时间戳记录
self.last_serialization = 0.0
self.last_deserialization = 0.0
print("✓ 数据序列化器已创建")
def initialize(self) -> bool:
"""
初始化数据序列化器
Returns:
是否初始化成功
"""
try:
self.initialized = True
print("✓ 数据序列化器初始化完成")
return True
except Exception as e:
print(f"✗ 数据序列化器初始化失败: {e}")
import traceback
traceback.print_exc()
return False
def enable(self) -> bool:
"""
启用数据序列化器
Returns:
是否启用成功
"""
try:
if not self.initialized:
print("✗ 数据序列化器未初始化")
return False
self.enabled = True
print("✓ 数据序列化器已启用")
return True
except Exception as e:
print(f"✗ 数据序列化器启用失败: {e}")
import traceback
traceback.print_exc()
return False
def disable(self):
"""禁用数据序列化器"""
try:
self.enabled = False
print("✓ 数据序列化器已禁用")
except Exception as e:
print(f"✗ 数据序列化器禁用失败: {e}")
import traceback
traceback.print_exc()
def finalize(self):
"""清理数据序列化器资源"""
try:
self.disable()
self.serialization_cache.clear()
self.serialization_callbacks.clear()
self.initialized = False
print("✓ 数据序列化器资源已清理")
except Exception as e:
print(f"✗ 数据序列化器资源清理失败: {e}")
import traceback
traceback.print_exc()
def update(self, dt: float):
"""
更新数据序列化器状态
Args:
dt: 时间增量
"""
try:
if not self.enabled:
return
# 定期清理缓存
if len(self.serialization_cache) > self.cache_max_size * self.cache_cleanup_threshold:
self._cleanup_cache()
except Exception as e:
print(f"✗ 数据序列化器更新失败: {e}")
import traceback
traceback.print_exc()
def _cleanup_cache(self):
"""清理序列化缓存"""
try:
# 简单的LRU缓存清理移除一半最旧的条目
cache_items = list(self.serialization_cache.items())
cache_items.sort(key=lambda x: x[1]['timestamp'])
remove_count = len(cache_items) // 2
for i in range(remove_count):
del self.serialization_cache[cache_items[i][0]]
except Exception as e:
print(f"✗ 缓存清理失败: {e}")
def serialize(self, data: Any, format: str = None, compress: bool = None) -> Optional[bytes]:
"""
序列化数据
Args:
data: 要序列化的数据
format: 序列化格式
compress: 是否压缩
Returns:
序列化后的字节数据或None
"""
try:
if not self.enabled:
print("✗ 数据序列化器未启用")
return None
# 使用默认格式
if format is None:
format = self.serialization_config['default_format']
# 检查是否支持该格式
if format not in self.supported_formats:
print(f"✗ 不支持的序列化格式: {format}")
self.serialization_stats['serialization_errors'] += 1
return None
# 检查缓存
cache_key = self._generate_cache_key(data, format, compress)
if cache_key in self.serialization_cache:
cached_data = self.serialization_cache[cache_key]
self.serialization_stats['objects_serialized'] += 1
self.last_serialization = time.time()
# 触发序列化回调
self._trigger_serialization_callback('data_serialized', {
'data': data,
'format': format,
'cached': True,
'size': len(cached_data['data'])
})
return cached_data['data']
# 执行序列化
serializer = self.supported_formats[format]
serialized_data = serializer(data)
if serialized_data is None:
self.serialization_stats['serialization_errors'] += 1
return None
# 压缩数据
if compress is None:
compress = self.serialization_config['enable_compression']
if compress:
original_size = len(serialized_data)
serialized_data = zlib.compress(serialized_data, self.serialization_config['compression_level'])
compression_savings = original_size - len(serialized_data)
self.serialization_stats['compression_savings'] += compression_savings
# Base64编码如果需要
if self.serialization_config['enable_base64_encoding']:
serialized_data = base64.b64encode(serialized_data)
# 缓存结果
self.serialization_cache[cache_key] = {
'data': serialized_data,
'timestamp': time.time(),
'format': format
}
# 保持缓存大小
if len(self.serialization_cache) > self.cache_max_size:
self._cleanup_cache()
# 更新统计信息
self.serialization_stats['objects_serialized'] += 1
self.serialization_stats['bytes_serialized'] += len(serialized_data)
self.last_serialization = time.time()
# 触发序列化回调
self._trigger_serialization_callback('data_serialized', {
'data': data,
'format': format,
'cached': False,
'size': len(serialized_data)
})
return serialized_data
except Exception as e:
print(f"✗ 数据序列化失败: {e}")
self.serialization_stats['serialization_errors'] += 1
# 触发序列化错误回调
self._trigger_serialization_callback('serialization_error', {
'data': data,
'error': str(e)
})
return None
def deserialize(self, data: bytes, format: str = None) -> Optional[Any]:
"""
反序列化数据
Args:
data: 要反序列化的字节数据
format: 序列化格式
Returns:
反序列化后的数据或None
"""
try:
if not self.enabled:
print("✗ 数据序列化器未启用")
return None
# Base64解码如果需要
if self.serialization_config['enable_base64_encoding']:
data = base64.b64decode(data)
# 使用默认格式
if format is None:
format = self.serialization_config['default_format']
# 检查是否支持该格式
if format not in self.deserialization_methods:
print(f"✗ 不支持的反序列化格式: {format}")
self.serialization_stats['deserialization_errors'] += 1
return None
# 解压缩数据
try:
decompressed_data = zlib.decompress(data)
data = decompressed_data
except zlib.error:
# 数据未压缩,直接使用
pass
# 执行反序列化
deserializer = self.deserialization_methods[format]
deserialized_data = deserializer(data)
if deserialized_data is None:
self.serialization_stats['deserialization_errors'] += 1
return None
# 更新统计信息
self.serialization_stats['objects_deserialized'] += 1
self.serialization_stats['bytes_deserialized'] += len(data)
self.last_deserialization = time.time()
# 触发反序列化回调
self._trigger_serialization_callback('data_deserialized', {
'format': format,
'size': len(data)
})
return deserialized_data
except Exception as e:
print(f"✗ 数据反序列化失败: {e}")
self.serialization_stats['deserialization_errors'] += 1
# 触发反序列化错误回调
self._trigger_serialization_callback('deserialization_error', {
'data': data,
'error': str(e)
})
return None
def _serialize_json(self, data: Any) -> Optional[bytes]:
"""
JSON序列化
Args:
data: 要序列化的数据
Returns:
序列化后的字节数据或None
"""
try:
json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
return json_str.encode('utf-8')
except Exception as e:
print(f"✗ JSON序列化失败: {e}")
return None
def _deserialize_json(self, data: bytes) -> Optional[Any]:
"""
JSON反序列化
Args:
data: 要反序列化的字节数据
Returns:
反序列化后的数据或None
"""
try:
json_str = data.decode('utf-8')
return json.loads(json_str)
except Exception as e:
print(f"✗ JSON反序列化失败: {e}")
return None
def _serialize_pickle(self, data: Any) -> Optional[bytes]:
"""
Pickle序列化
Args:
data: 要序列化的数据
Returns:
序列化后的字节数据或None
"""
try:
return pickle.dumps(data)
except Exception as e:
print(f"✗ Pickle序列化失败: {e}")
return None
def _deserialize_pickle(self, data: bytes) -> Optional[Any]:
"""
Pickle反序列化
Args:
data: 要反序列化的字节数据
Returns:
反序列化后的数据或None
"""
try:
return pickle.loads(data)
except Exception as e:
print(f"✗ Pickle反序列化失败: {e}")
return None
def _serialize_binary(self, data: Any) -> Optional[bytes]:
"""
二进制序列化
Args:
data: 要序列化的数据
Returns:
序列化后的字节数据或None
"""
try:
# 简化的二进制序列化实现
# 实际应用中可能需要更复杂的实现
if isinstance(data, dict):
return self._serialize_dict_binary(data)
elif isinstance(data, list):
return self._serialize_list_binary(data)
elif isinstance(data, (int, float, str, bool)):
return self._serialize_primitive_binary(data)
else:
# 对于不支持的类型回退到pickle
return pickle.dumps(data)
except Exception as e:
print(f"✗ 二进制序列化失败: {e}")
return None
def _deserialize_binary(self, data: bytes) -> Optional[Any]:
"""
二进制反序列化
Args:
data: 要反序列化的字节数据
Returns:
反序列化后的数据或None
"""
try:
# 简化的二进制反序列化实现
# 实际应用中可能需要更复杂的实现
if len(data) < 4:
return None
# 读取类型标识
type_id = struct.unpack('!I', data[:4])[0]
if type_id == 1: # 字典
return self._deserialize_dict_binary(data)
elif type_id == 2: # 列表
return self._deserialize_list_binary(data)
elif type_id == 3: # 原始类型
return self._deserialize_primitive_binary(data)
else:
# 对于不支持的类型回退到pickle
return pickle.loads(data)
except Exception as e:
print(f"✗ 二进制反序列化失败: {e}")
return None
def _serialize_dict_binary(self, data: Dict[str, Any]) -> bytes:
"""序列化字典为二进制格式"""
try:
# 类型标识
result = struct.pack('!I', 1) # 字典类型
# 键值对数量
result += struct.pack('!I', len(data))
# 序列化每个键值对
for key, value in data.items():
# 序列化键
key_bytes = str(key).encode('utf-8')
result += struct.pack('!I', len(key_bytes))
result += key_bytes
# 序列化值
if isinstance(value, (int, float, str, bool)):
value_bytes = self._serialize_primitive_binary(value)
else:
value_bytes = pickle.dumps(value)
result += struct.pack('!I', len(value_bytes))
result += value_bytes
return result
except Exception as e:
print(f"✗ 字典二进制序列化失败: {e}")
raise
def _deserialize_dict_binary(self, data: bytes) -> Dict[str, Any]:
"""从二进制格式反序列化字典"""
try:
offset = 4 # 跳过类型标识
# 读取键值对数量
count = struct.unpack('!I', data[offset:offset+4])[0]
offset += 4
result = {}
for _ in range(count):
# 读取键
key_len = struct.unpack('!I', data[offset:offset+4])[0]
offset += 4
key = data[offset:offset+key_len].decode('utf-8')
offset += key_len
# 读取值
value_len = struct.unpack('!I', data[offset:offset+4])[0]
offset += 4
value_bytes = data[offset:offset+value_len]
offset += value_len
# 反序列化值
try:
value = self._deserialize_primitive_binary(value_bytes)
except:
value = pickle.loads(value_bytes)
result[key] = value
return result
except Exception as e:
print(f"✗ 字典二进制反序列化失败: {e}")
raise
def _serialize_list_binary(self, data: List[Any]) -> bytes:
"""序列化列表为二进制格式"""
try:
# 类型标识
result = struct.pack('!I', 2) # 列表类型
# 元素数量
result += struct.pack('!I', len(data))
# 序列化每个元素
for item in data:
if isinstance(item, (int, float, str, bool)):
item_bytes = self._serialize_primitive_binary(item)
else:
item_bytes = pickle.dumps(item)
result += struct.pack('!I', len(item_bytes))
result += item_bytes
return result
except Exception as e:
print(f"✗ 列表二进制序列化失败: {e}")
raise
def _deserialize_list_binary(self, data: bytes) -> List[Any]:
"""从二进制格式反序列化列表"""
try:
offset = 4 # 跳过类型标识
# 读取元素数量
count = struct.unpack('!I', data[offset:offset+4])[0]
offset += 4
result = []
for _ in range(count):
# 读取元素
item_len = struct.unpack('!I', data[offset:offset+4])[0]
offset += 4
item_bytes = data[offset:offset+item_len]
offset += item_len
# 反序列化元素
try:
item = self._deserialize_primitive_binary(item_bytes)
except:
item = pickle.loads(item_bytes)
result.append(item)
return result
except Exception as e:
print(f"✗ 列表二进制反序列化失败: {e}")
raise
def _serialize_primitive_binary(self, data: Union[int, float, str, bool]) -> bytes:
"""序列化原始类型为二进制格式"""
try:
# 类型标识
result = struct.pack('!I', 3) # 原始类型
if isinstance(data, bool):
result += struct.pack('!B', 1 if data else 0)
elif isinstance(data, int):
result += struct.pack('!B', 1) # int类型
result += struct.pack('!q', data)
elif isinstance(data, float):
result += struct.pack('!B', 2) # float类型
result += struct.pack('!d', data)
elif isinstance(data, str):
result += struct.pack('!B', 3) # string类型
data_bytes = data.encode('utf-8')
result += struct.pack('!I', len(data_bytes))
result += data_bytes
else:
raise ValueError(f"Unsupported primitive type: {type(data)}")
return result
except Exception as e:
print(f"✗ 原始类型二进制序列化失败: {e}")
raise
def _deserialize_primitive_binary(self, data: bytes) -> Union[int, float, str, bool]:
"""从二进制格式反序列化原始类型"""
try:
offset = 4 # 跳过类型标识
type_code = struct.unpack('!B', data[offset:offset+1])[0]
offset += 1
if type_code == 0: # bool
return struct.unpack('!B', data[offset:offset+1])[0] != 0
elif type_code == 1: # int
return struct.unpack('!q', data[offset:offset+8])[0]
elif type_code == 2: # float
return struct.unpack('!d', data[offset:offset+8])[0]
elif type_code == 3: # string
str_len = struct.unpack('!I', data[offset:offset+4])[0]
offset += 4
return data[offset:offset+str_len].decode('utf-8')
else:
raise ValueError(f"Unknown primitive type code: {type_code}")
except Exception as e:
print(f"✗ 原始类型二进制反序列化失败: {e}")
raise
def _generate_cache_key(self, data: Any, format: str, compress: bool) -> str:
"""
生成缓存键
Args:
data: 数据
format: 格式
compress: 是否压缩
Returns:
缓存键
"""
try:
# 简单的缓存键生成方法
import hashlib
data_hash = hashlib.md5(str(data).encode('utf-8')).hexdigest()
return f"{data_hash}_{format}_{compress}"
except Exception as e:
print(f"✗ 缓存键生成失败: {e}")
return str(id(data))
def delta_encode(self, current_data: Dict[str, Any],
previous_data: Dict[str, Any]) -> Dict[str, Any]:
"""
增量编码
Args:
current_data: 当前数据
previous_data: 之前的数据
Returns:
增量编码后的数据
"""
try:
if not self.serialization_config['enable_delta_encoding']:
return current_data
delta_data = {}
for key, value in current_data.items():
if key not in previous_data or previous_data[key] != value:
delta_data[key] = value
return delta_data
except Exception as e:
print(f"✗ 增量编码失败: {e}")
return current_data
def delta_decode(self, delta_data: Dict[str, Any],
previous_data: Dict[str, Any]) -> Dict[str, Any]:
"""
增量解码
Args:
delta_data: 增量数据
previous_data: 之前的数据
Returns:
解码后的完整数据
"""
try:
if not self.serialization_config['enable_delta_encoding']:
return delta_data
full_data = previous_data.copy()
full_data.update(delta_data)
return full_data
except Exception as e:
print(f"✗ 增量解码失败: {e}")
return delta_data
def get_serialization_stats(self) -> Dict[str, Any]:
"""
获取序列化统计信息
Returns:
序列化统计字典
"""
return self.serialization_stats.copy()
def reset_serialization_stats(self):
"""重置序列化统计信息"""
try:
self.serialization_stats = {
'objects_serialized': 0,
'objects_deserialized': 0,
'bytes_serialized': 0,
'bytes_deserialized': 0,
'compression_savings': 0,
'serialization_errors': 0,
'deserialization_errors': 0
}
print("✓ 序列化统计信息已重置")
except Exception as e:
print(f"✗ 序列化统计信息重置失败: {e}")
def set_serialization_config(self, config: Dict[str, Any]) -> bool:
"""
设置序列化配置
Args:
config: 序列化配置字典
Returns:
是否设置成功
"""
try:
self.serialization_config.update(config)
print(f"✓ 序列化配置已更新: {self.serialization_config}")
return True
except Exception as e:
print(f"✗ 序列化配置设置失败: {e}")
return False
def get_serialization_config(self) -> Dict[str, Any]:
"""
获取序列化配置
Returns:
序列化配置字典
"""
return self.serialization_config.copy()
def clear_cache(self):
"""清空序列化缓存"""
try:
self.serialization_cache.clear()
print("✓ 序列化缓存已清空")
except Exception as e:
print(f"✗ 序列化缓存清空失败: {e}")
def _trigger_serialization_callback(self, callback_type: str, data: Dict[str, Any]):
"""
触发序列化回调
Args:
callback_type: 回调类型
data: 回调数据
"""
try:
if callback_type in self.serialization_callbacks:
for callback in self.serialization_callbacks[callback_type]:
try:
callback(data)
except Exception as e:
print(f"✗ 序列化回调执行失败: {e}")
except Exception as e:
print(f"✗ 序列化回调触发失败: {e}")
def register_serialization_callback(self, callback_type: str, callback: callable):
"""
注册序列化回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.serialization_callbacks:
self.serialization_callbacks[callback_type].append(callback)
print(f"✓ 序列化回调已注册: {callback_type}")
else:
print(f"✗ 无效的回调类型: {callback_type}")
except Exception as e:
print(f"✗ 序列化回调注册失败: {e}")
def unregister_serialization_callback(self, callback_type: str, callback: callable):
"""
注销序列化回调
Args:
callback_type: 回调类型
callback: 回调函数
"""
try:
if callback_type in self.serialization_callbacks:
if callback in self.serialization_callbacks[callback_type]:
self.serialization_callbacks[callback_type].remove(callback)
print(f"✓ 序列化回调已注销: {callback_type}")
except Exception as e:
print(f"✗ 序列化回调注销失败: {e}")
# 导入time模块
import time