208 lines
7.4 KiB
Python
208 lines
7.4 KiB
Python
import psutil
|
|
import GPUtil
|
|
import platform
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional
|
|
import logging
|
|
from pathlib import Path
|
|
import time
|
|
|
|
class SystemMonitor:
|
|
"""系统资源监控类"""
|
|
|
|
def __init__(self):
|
|
"""初始化系统监控器"""
|
|
self.logger = logging.getLogger(__name__)
|
|
self._setup_logging()
|
|
|
|
def _setup_logging(self):
|
|
"""设置日志"""
|
|
log_dir = Path('.log')
|
|
log_dir.mkdir(exist_ok=True)
|
|
|
|
file_handler = logging.FileHandler(
|
|
log_dir / f'system_monitor_{datetime.now():%Y%m%d_%H%M%S}.log'
|
|
)
|
|
file_handler.setFormatter(
|
|
logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
)
|
|
self.logger.addHandler(file_handler)
|
|
self.logger.setLevel(logging.INFO)
|
|
|
|
def _get_gpu_info(self) -> List[Dict]:
|
|
"""获取GPU信息"""
|
|
try:
|
|
gpus = GPUtil.getGPUs()
|
|
gpu_info = []
|
|
|
|
for gpu in gpus:
|
|
# 获取GPU进程信息
|
|
processes = []
|
|
|
|
# if gpu.processes:
|
|
# for proc in gpu.processes:
|
|
# process = psutil.Process(proc.pid)
|
|
# processes.append({
|
|
# 'pid': proc.pid,
|
|
# 'name': process.name(),
|
|
# 'memory': proc.gpu_memory # MB
|
|
# })
|
|
|
|
|
|
gpu_info.append({
|
|
'id': gpu.id,
|
|
'name': gpu.name,
|
|
'memory': {
|
|
'total': gpu.memoryTotal, # MB
|
|
'used': gpu.memoryUsed, # MB
|
|
'free': gpu.memoryFree # MB
|
|
},
|
|
'utilization': {
|
|
'gpu': gpu.load * 100, # %
|
|
'memory': gpu.memoryUtil * 100 # %
|
|
},
|
|
'temperature': gpu.temperature, # °C
|
|
'power': {
|
|
'draw': gpu.powerDraw if hasattr(gpu, 'powerDraw') else None, # W
|
|
'limit': gpu.powerLimit if hasattr(gpu, 'powerLimit') else None # W
|
|
},
|
|
'processes': processes
|
|
})
|
|
|
|
return gpu_info
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"获取GPU信息失败: {str(e)}")
|
|
return []
|
|
|
|
def _get_cpu_info(self) -> Dict:
|
|
"""获取CPU信息"""
|
|
try:
|
|
cpu_freq = psutil.cpu_freq()
|
|
cpu_temp = psutil.sensors_temperatures().get('coretemp', [None])[0]
|
|
|
|
return {
|
|
'count': {
|
|
'physical': psutil.cpu_count(logical=False),
|
|
'logical': psutil.cpu_count(logical=True)
|
|
},
|
|
'utilization': psutil.cpu_percent(interval=1), # %
|
|
'frequency': {
|
|
'current': cpu_freq.current / 1000 if cpu_freq else None, # GHz
|
|
'min': cpu_freq.min / 1000 if cpu_freq else None, # GHz
|
|
'max': cpu_freq.max / 1000 if cpu_freq else None # GHz
|
|
},
|
|
'temperature': cpu_temp.current if cpu_temp else None, # °C
|
|
'memory': self._get_memory_info(),
|
|
'swap': self._get_swap_info()
|
|
}
|
|
except Exception as e:
|
|
self.logger.error(f"获取CPU信息失败: {str(e)}")
|
|
return {}
|
|
|
|
def _get_memory_info(self) -> Dict:
|
|
"""获取内存信息"""
|
|
try:
|
|
mem = psutil.virtual_memory()
|
|
return {
|
|
'total': mem.total // (1024 * 1024), # MB
|
|
'used': mem.used // (1024 * 1024), # MB
|
|
'free': mem.free // (1024 * 1024), # MB
|
|
'percent': mem.percent # %
|
|
}
|
|
except Exception as e:
|
|
self.logger.error(f"获取内存信息失败: {str(e)}")
|
|
return {}
|
|
|
|
def _get_swap_info(self) -> Dict:
|
|
"""获取交换内存信息"""
|
|
try:
|
|
swap = psutil.swap_memory()
|
|
return {
|
|
'total': swap.total // (1024 * 1024), # MB
|
|
'used': swap.used // (1024 * 1024), # MB
|
|
'free': swap.free // (1024 * 1024), # MB
|
|
'percent': swap.percent # %
|
|
}
|
|
except Exception as e:
|
|
self.logger.error(f"获取交换内存信息失败: {str(e)}")
|
|
return {}
|
|
|
|
def _get_disk_info(self) -> Dict:
|
|
"""获取磁盘信息"""
|
|
try:
|
|
disk_info = {}
|
|
for partition in psutil.disk_partitions():
|
|
try:
|
|
usage = psutil.disk_usage(partition.mountpoint)
|
|
disk_info[partition.mountpoint] = {
|
|
'total': usage.total // (1024 * 1024), # MB
|
|
'used': usage.used // (1024 * 1024), # MB
|
|
'free': usage.free // (1024 * 1024), # MB
|
|
'percent': usage.percent # %
|
|
}
|
|
except (PermissionError, OSError):
|
|
continue
|
|
return disk_info
|
|
except Exception as e:
|
|
self.logger.error(f"获取磁盘信息失败: {str(e)}")
|
|
return {}
|
|
|
|
def _get_process_info(self) -> Dict:
|
|
"""获取进程信息"""
|
|
try:
|
|
processes = {
|
|
'total': len(psutil.pids()),
|
|
'running': 0,
|
|
'sleeping': 0
|
|
}
|
|
|
|
for proc in psutil.process_iter(['status']):
|
|
try:
|
|
status = proc.info['status']
|
|
if status == 'running':
|
|
processes['running'] += 1
|
|
elif status == 'sleeping':
|
|
processes['sleeping'] += 1
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
continue
|
|
|
|
return processes
|
|
except Exception as e:
|
|
self.logger.error(f"获取进程信息失败: {str(e)}")
|
|
return {}
|
|
|
|
def get_system_resources(self) -> Dict:
|
|
"""
|
|
获取系统资源使用情况
|
|
|
|
Returns:
|
|
系统资源信息
|
|
"""
|
|
try:
|
|
resources = {
|
|
'gpu': self._get_gpu_info(),
|
|
'cpu': self._get_cpu_info(),
|
|
'disk': self._get_disk_info(),
|
|
'processes': self._get_process_info()
|
|
}
|
|
|
|
self.logger.info("成功获取系统资源信息")
|
|
|
|
return {
|
|
'status': 'success',
|
|
'resources': resources,
|
|
'timestamp': datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
|
|
}
|
|
|
|
except Exception as e:
|
|
error_msg = f"获取系统资源信息失败: {str(e)}"
|
|
self.logger.error(error_msg)
|
|
return {
|
|
'status': 'error',
|
|
'message': '获取资源信息失败',
|
|
'details': {
|
|
'error_type': type(e).__name__,
|
|
'error_message': str(e)
|
|
}
|
|
} |