import psutil import GPUtil import platform from datetime import datetime from typing import Dict, List, Optional import logging from pathlib import Path import time class SystemMonitor: """系统资源监控类""" def __init__(self): """初始化系统监控器""" self.logger = logging.getLogger(__name__) self._setup_logging() def _setup_logging(self): """设置日志""" log_dir = Path('.log') log_dir.mkdir(exist_ok=True) file_handler = logging.FileHandler( log_dir / f'system_monitor_{datetime.now():%Y%m%d_%H%M%S}.log' ) file_handler.setFormatter( logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') ) self.logger.addHandler(file_handler) self.logger.setLevel(logging.INFO) def _get_gpu_info(self) -> List[Dict]: """获取GPU信息""" try: gpus = GPUtil.getGPUs() gpu_info = [] for gpu in gpus: # 获取GPU进程信息 processes = [] # if gpu.processes: # for proc in gpu.processes: # process = psutil.Process(proc.pid) # processes.append({ # 'pid': proc.pid, # 'name': process.name(), # 'memory': proc.gpu_memory # MB # }) gpu_info.append({ 'id': gpu.id, 'name': gpu.name, 'memory': { 'total': gpu.memoryTotal, # MB 'used': gpu.memoryUsed, # MB 'free': gpu.memoryFree # MB }, 'utilization': { 'gpu': gpu.load * 100, # % 'memory': gpu.memoryUtil * 100 # % }, 'temperature': gpu.temperature, # °C 'power': { 'draw': gpu.powerDraw if hasattr(gpu, 'powerDraw') else None, # W 'limit': gpu.powerLimit if hasattr(gpu, 'powerLimit') else None # W }, 'processes': processes }) return gpu_info except Exception as e: self.logger.error(f"获取GPU信息失败: {str(e)}") return [] def _get_cpu_info(self) -> Dict: """获取CPU信息""" try: cpu_freq = psutil.cpu_freq() cpu_temp = psutil.sensors_temperatures().get('coretemp', [None])[0] return { 'count': { 'physical': psutil.cpu_count(logical=False), 'logical': psutil.cpu_count(logical=True) }, 'utilization': psutil.cpu_percent(interval=1), # % 'frequency': { 'current': cpu_freq.current / 1000 if cpu_freq else None, # GHz 'min': cpu_freq.min / 1000 if cpu_freq else None, # GHz 'max': cpu_freq.max / 1000 if cpu_freq else None # GHz }, 'temperature': cpu_temp.current if cpu_temp else None, # °C 'memory': self._get_memory_info(), 'swap': self._get_swap_info() } except Exception as e: self.logger.error(f"获取CPU信息失败: {str(e)}") return {} def _get_memory_info(self) -> Dict: """获取内存信息""" try: mem = psutil.virtual_memory() return { 'total': mem.total // (1024 * 1024), # MB 'used': mem.used // (1024 * 1024), # MB 'free': mem.free // (1024 * 1024), # MB 'percent': mem.percent # % } except Exception as e: self.logger.error(f"获取内存信息失败: {str(e)}") return {} def _get_swap_info(self) -> Dict: """获取交换内存信息""" try: swap = psutil.swap_memory() return { 'total': swap.total // (1024 * 1024), # MB 'used': swap.used // (1024 * 1024), # MB 'free': swap.free // (1024 * 1024), # MB 'percent': swap.percent # % } except Exception as e: self.logger.error(f"获取交换内存信息失败: {str(e)}") return {} def _get_disk_info(self) -> Dict: """获取磁盘信息""" try: disk_info = {} for partition in psutil.disk_partitions(): try: usage = psutil.disk_usage(partition.mountpoint) disk_info[partition.mountpoint] = { 'total': usage.total // (1024 * 1024), # MB 'used': usage.used // (1024 * 1024), # MB 'free': usage.free // (1024 * 1024), # MB 'percent': usage.percent # % } except (PermissionError, OSError): continue return disk_info except Exception as e: self.logger.error(f"获取磁盘信息失败: {str(e)}") return {} def _get_process_info(self) -> Dict: """获取进程信息""" try: processes = { 'total': len(psutil.pids()), 'running': 0, 'sleeping': 0 } for proc in psutil.process_iter(['status']): try: status = proc.info['status'] if status == 'running': processes['running'] += 1 elif status == 'sleeping': processes['sleeping'] += 1 except (psutil.NoSuchProcess, psutil.AccessDenied): continue return processes except Exception as e: self.logger.error(f"获取进程信息失败: {str(e)}") return {} def get_system_resources(self) -> Dict: """ 获取系统资源使用情况 Returns: 系统资源信息 """ try: resources = { 'gpu': self._get_gpu_info(), 'cpu': self._get_cpu_info(), 'disk': self._get_disk_info(), 'processes': self._get_process_info() } self.logger.info("成功获取系统资源信息") return { 'status': 'success', 'resources': resources, 'timestamp': datetime.now().strftime('%Y-%m-%dT%H:%M:%S') } except Exception as e: error_msg = f"获取系统资源信息失败: {str(e)}" self.logger.error(error_msg) return { 'status': 'error', 'message': '获取资源信息失败', 'details': { 'error_type': type(e).__name__, 'error_message': str(e) } }