完成--完成监控系统资源方法
This commit is contained in:
parent
b0609e1ad1
commit
cb75799ae2
@ -559,17 +559,85 @@ Response:
|
||||
"gpu": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "NVIDIA T4",
|
||||
"memory_used": 8542,
|
||||
"memory_total": 16384,
|
||||
"utilization": 75
|
||||
"name": "NVIDIA GeForce RTX 3090",
|
||||
"memory": {
|
||||
"total": 24576, // MB
|
||||
"used": 3678, // MB
|
||||
"free": 20898 // MB
|
||||
},
|
||||
"utilization": {
|
||||
"gpu": 45, // %
|
||||
"memory": 15 // %
|
||||
},
|
||||
"temperature": 65, // °C
|
||||
"power": {
|
||||
"draw": 180.5, // W
|
||||
"limit": 350.0 // W
|
||||
},
|
||||
"processes": [
|
||||
{
|
||||
"pid": 1234,
|
||||
"name": "python",
|
||||
"memory": 2048 // MB
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"cpu": {
|
||||
"usage_percent": 65,
|
||||
"memory_used": 32768,
|
||||
"memory_total": 65536
|
||||
"count": {
|
||||
"physical": 16,
|
||||
"logical": 32
|
||||
},
|
||||
"utilization": 35.5, // %
|
||||
"frequency": {
|
||||
"current": 3.6, // GHz
|
||||
"min": 2.5, // GHz
|
||||
"max": 4.2 // GHz
|
||||
},
|
||||
"temperature": 45.5, // °C
|
||||
"memory": {
|
||||
"total": 32768, // MB
|
||||
"used": 16384, // MB
|
||||
"free": 16384, // MB
|
||||
"percent": 50.0 // %
|
||||
},
|
||||
"swap": {
|
||||
"total": 8192, // MB
|
||||
"used": 1024, // MB
|
||||
"free": 7168, // MB
|
||||
"percent": 12.5 // %
|
||||
}
|
||||
},
|
||||
"disk": {
|
||||
"/": {
|
||||
"total": 512000, // MB
|
||||
"used": 256000, // MB
|
||||
"free": 256000, // MB
|
||||
"percent": 50.0 // %
|
||||
},
|
||||
"/home": {
|
||||
"total": 1024000, // MB
|
||||
"used": 512000, // MB
|
||||
"free": 512000, // MB
|
||||
"percent": 50.0 // %
|
||||
}
|
||||
},
|
||||
"processes": {
|
||||
"total": 256,
|
||||
"running": 2,
|
||||
"sleeping": 254
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-02-19T15:30:45"
|
||||
}
|
||||
|
||||
Error Response:
|
||||
{
|
||||
"status": "error",
|
||||
"message": "获取资源信息失败",
|
||||
"details": {
|
||||
"error_type": "GPUQueryError",
|
||||
"error_message": "Failed to query GPU information"
|
||||
}
|
||||
}
|
||||
```
|
||||
@ -616,8 +684,6 @@ Response:
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 附录A:方法详细说明
|
||||
|
||||
### A1. 数据预处理方法
|
||||
|
||||
5
example_system_monitor.py
Normal file
5
example_system_monitor.py
Normal file
@ -0,0 +1,5 @@
|
||||
from function.system_monitor import SystemMonitor
|
||||
|
||||
system_monitor = SystemMonitor()
|
||||
|
||||
print(system_monitor.get_system_resources())
|
||||
BIN
function/__pycache__/system_monitor.cpython-39.pyc
Normal file
BIN
function/__pycache__/system_monitor.cpython-39.pyc
Normal file
Binary file not shown.
208
function/system_monitor.py
Normal file
208
function/system_monitor.py
Normal file
@ -0,0 +1,208 @@
|
||||
import psutil
|
||||
import GPUtil
|
||||
import platform
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
class SystemMonitor:
|
||||
"""系统资源监控类"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化系统监控器"""
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._setup_logging()
|
||||
|
||||
def _setup_logging(self):
|
||||
"""设置日志"""
|
||||
log_dir = Path('.log')
|
||||
log_dir.mkdir(exist_ok=True)
|
||||
|
||||
file_handler = logging.FileHandler(
|
||||
log_dir / f'system_monitor_{datetime.now():%Y%m%d_%H%M%S}.log'
|
||||
)
|
||||
file_handler.setFormatter(
|
||||
logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
)
|
||||
self.logger.addHandler(file_handler)
|
||||
self.logger.setLevel(logging.INFO)
|
||||
|
||||
def _get_gpu_info(self) -> List[Dict]:
|
||||
"""获取GPU信息"""
|
||||
try:
|
||||
gpus = GPUtil.getGPUs()
|
||||
gpu_info = []
|
||||
|
||||
for gpu in gpus:
|
||||
# 获取GPU进程信息
|
||||
processes = []
|
||||
|
||||
# if gpu.processes:
|
||||
# for proc in gpu.processes:
|
||||
# process = psutil.Process(proc.pid)
|
||||
# processes.append({
|
||||
# 'pid': proc.pid,
|
||||
# 'name': process.name(),
|
||||
# 'memory': proc.gpu_memory # MB
|
||||
# })
|
||||
|
||||
|
||||
gpu_info.append({
|
||||
'id': gpu.id,
|
||||
'name': gpu.name,
|
||||
'memory': {
|
||||
'total': gpu.memoryTotal, # MB
|
||||
'used': gpu.memoryUsed, # MB
|
||||
'free': gpu.memoryFree # MB
|
||||
},
|
||||
'utilization': {
|
||||
'gpu': gpu.load * 100, # %
|
||||
'memory': gpu.memoryUtil * 100 # %
|
||||
},
|
||||
'temperature': gpu.temperature, # °C
|
||||
'power': {
|
||||
'draw': gpu.powerDraw if hasattr(gpu, 'powerDraw') else None, # W
|
||||
'limit': gpu.powerLimit if hasattr(gpu, 'powerLimit') else None # W
|
||||
},
|
||||
'processes': processes
|
||||
})
|
||||
|
||||
return gpu_info
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"获取GPU信息失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def _get_cpu_info(self) -> Dict:
|
||||
"""获取CPU信息"""
|
||||
try:
|
||||
cpu_freq = psutil.cpu_freq()
|
||||
cpu_temp = psutil.sensors_temperatures().get('coretemp', [None])[0]
|
||||
|
||||
return {
|
||||
'count': {
|
||||
'physical': psutil.cpu_count(logical=False),
|
||||
'logical': psutil.cpu_count(logical=True)
|
||||
},
|
||||
'utilization': psutil.cpu_percent(interval=1), # %
|
||||
'frequency': {
|
||||
'current': cpu_freq.current / 1000 if cpu_freq else None, # GHz
|
||||
'min': cpu_freq.min / 1000 if cpu_freq else None, # GHz
|
||||
'max': cpu_freq.max / 1000 if cpu_freq else None # GHz
|
||||
},
|
||||
'temperature': cpu_temp.current if cpu_temp else None, # °C
|
||||
'memory': self._get_memory_info(),
|
||||
'swap': self._get_swap_info()
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.error(f"获取CPU信息失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_memory_info(self) -> Dict:
|
||||
"""获取内存信息"""
|
||||
try:
|
||||
mem = psutil.virtual_memory()
|
||||
return {
|
||||
'total': mem.total // (1024 * 1024), # MB
|
||||
'used': mem.used // (1024 * 1024), # MB
|
||||
'free': mem.free // (1024 * 1024), # MB
|
||||
'percent': mem.percent # %
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.error(f"获取内存信息失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_swap_info(self) -> Dict:
|
||||
"""获取交换内存信息"""
|
||||
try:
|
||||
swap = psutil.swap_memory()
|
||||
return {
|
||||
'total': swap.total // (1024 * 1024), # MB
|
||||
'used': swap.used // (1024 * 1024), # MB
|
||||
'free': swap.free // (1024 * 1024), # MB
|
||||
'percent': swap.percent # %
|
||||
}
|
||||
except Exception as e:
|
||||
self.logger.error(f"获取交换内存信息失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_disk_info(self) -> Dict:
|
||||
"""获取磁盘信息"""
|
||||
try:
|
||||
disk_info = {}
|
||||
for partition in psutil.disk_partitions():
|
||||
try:
|
||||
usage = psutil.disk_usage(partition.mountpoint)
|
||||
disk_info[partition.mountpoint] = {
|
||||
'total': usage.total // (1024 * 1024), # MB
|
||||
'used': usage.used // (1024 * 1024), # MB
|
||||
'free': usage.free // (1024 * 1024), # MB
|
||||
'percent': usage.percent # %
|
||||
}
|
||||
except (PermissionError, OSError):
|
||||
continue
|
||||
return disk_info
|
||||
except Exception as e:
|
||||
self.logger.error(f"获取磁盘信息失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_process_info(self) -> Dict:
|
||||
"""获取进程信息"""
|
||||
try:
|
||||
processes = {
|
||||
'total': len(psutil.pids()),
|
||||
'running': 0,
|
||||
'sleeping': 0
|
||||
}
|
||||
|
||||
for proc in psutil.process_iter(['status']):
|
||||
try:
|
||||
status = proc.info['status']
|
||||
if status == 'running':
|
||||
processes['running'] += 1
|
||||
elif status == 'sleeping':
|
||||
processes['sleeping'] += 1
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
|
||||
return processes
|
||||
except Exception as e:
|
||||
self.logger.error(f"获取进程信息失败: {str(e)}")
|
||||
return {}
|
||||
|
||||
def get_system_resources(self) -> Dict:
|
||||
"""
|
||||
获取系统资源使用情况
|
||||
|
||||
Returns:
|
||||
系统资源信息
|
||||
"""
|
||||
try:
|
||||
resources = {
|
||||
'gpu': self._get_gpu_info(),
|
||||
'cpu': self._get_cpu_info(),
|
||||
'disk': self._get_disk_info(),
|
||||
'processes': self._get_process_info()
|
||||
}
|
||||
|
||||
self.logger.info("成功获取系统资源信息")
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'resources': resources,
|
||||
'timestamp': datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"获取系统资源信息失败: {str(e)}"
|
||||
self.logger.error(error_msg)
|
||||
return {
|
||||
'status': 'error',
|
||||
'message': '获取资源信息失败',
|
||||
'details': {
|
||||
'error_type': type(e).__name__,
|
||||
'error_message': str(e)
|
||||
}
|
||||
}
|
||||
86
test_system_monitor.py
Normal file
86
test_system_monitor.py
Normal file
@ -0,0 +1,86 @@
|
||||
import pytest
|
||||
from function.system_monitor import SystemMonitor
|
||||
from typing import Dict
|
||||
|
||||
class TestSystemMonitor:
|
||||
@pytest.fixture
|
||||
def system_monitor(self):
|
||||
return SystemMonitor()
|
||||
|
||||
def test_get_system_resources(self, system_monitor):
|
||||
"""测试获取系统资源信息"""
|
||||
result = system_monitor.get_system_resources()
|
||||
|
||||
# 验证返回格式
|
||||
assert isinstance(result, dict)
|
||||
assert 'status' in result
|
||||
assert result['status'] == 'success'
|
||||
assert 'resources' in result
|
||||
assert 'timestamp' in result
|
||||
|
||||
resources = result['resources']
|
||||
|
||||
# 验证GPU信息
|
||||
assert 'gpu' in resources
|
||||
if resources['gpu']: # 如果有GPU
|
||||
gpu = resources['gpu'][0]
|
||||
assert 'id' in gpu
|
||||
assert 'name' in gpu
|
||||
assert 'memory' in gpu
|
||||
assert 'utilization' in gpu
|
||||
assert 'temperature' in gpu
|
||||
|
||||
# 验证CPU信息
|
||||
assert 'cpu' in resources
|
||||
cpu = resources['cpu']
|
||||
assert 'count' in cpu
|
||||
assert 'utilization' in cpu
|
||||
assert 'memory' in cpu
|
||||
assert 'swap' in cpu
|
||||
|
||||
# 验证内存信息
|
||||
memory = cpu['memory']
|
||||
assert 'total' in memory
|
||||
assert 'used' in memory
|
||||
assert 'free' in memory
|
||||
assert 'percent' in memory
|
||||
assert memory['total'] > 0
|
||||
assert 0 <= memory['percent'] <= 100
|
||||
|
||||
# 验证磁盘信息
|
||||
assert 'disk' in resources
|
||||
assert len(resources['disk']) > 0
|
||||
for mount_point, disk_info in resources['disk'].items():
|
||||
assert 'total' in disk_info
|
||||
assert 'used' in disk_info
|
||||
assert 'free' in disk_info
|
||||
assert 'percent' in disk_info
|
||||
assert disk_info['total'] > 0
|
||||
assert 0 <= disk_info['percent'] <= 100
|
||||
|
||||
# 验证进程信息
|
||||
assert 'processes' in resources
|
||||
processes = resources['processes']
|
||||
assert 'total' in processes
|
||||
assert 'running' in processes
|
||||
assert 'sleeping' in processes
|
||||
assert processes['total'] > 0
|
||||
assert processes['running'] >= 0
|
||||
assert processes['sleeping'] >= 0
|
||||
|
||||
def test_error_handling(self, system_monitor, monkeypatch):
|
||||
"""测试错误处理"""
|
||||
def mock_gpu_error(*args, **kwargs):
|
||||
raise Exception("GPU query failed")
|
||||
|
||||
# 模拟GPU查询错误
|
||||
monkeypatch.setattr(system_monitor, '_get_gpu_info', mock_gpu_error)
|
||||
|
||||
result = system_monitor.get_system_resources()
|
||||
assert result['status'] == 'success' # 即使GPU查询失败,其他资源信息仍应返回
|
||||
assert result['resources']['gpu'] == [] # GPU信息应为空列表
|
||||
|
||||
# 验证其他资源信息仍然可用
|
||||
assert 'cpu' in result['resources']
|
||||
assert 'disk' in result['resources']
|
||||
assert 'processes' in result['resources']
|
||||
Loading…
Reference in New Issue
Block a user