diff --git a/doc/时间计划.md b/doc/时间计划.md index 8de5b64..ec50f24 100644 --- a/doc/时间计划.md +++ b/doc/时间计划.md @@ -20,14 +20,15 @@ ## 4. 监控系统 (1) - 20250226 - [x] 资源监控 - - [ ] 训练监控 - - [ ] 告警系统 - - [ ] 日志聚合 + - [ ] 训练监控 --暂无等后台起来再做 + - [ ] 告警系统 --暂无 + - [x] 日志聚合 - 1 天 ## 5. FastAPI后端服务 (2) - 20250221~20250224 - - [ ] API路由设计 + - [x] 整合系统方法 + - [x] API路由设计 - [ ] 请求/响应模型 - [ ] 异步处理支持 - [ ] API文档生成 diff --git a/example_data_processor.py b/example_data_processor.py deleted file mode 100644 index 085fc7b..0000000 --- a/example_data_processor.py +++ /dev/null @@ -1,117 +0,0 @@ -from function.data_processor_date import DataProcessor -import numpy as np - -# 创建处理器实例 -processor = DataProcessor() - -# 定义数据预处理方法 -process_methods = [ - # 缺失值处理 - { - 'method_name': 'SimpleImputer', - 'params': { - 'strategy': 'mean', - 'missing_values': np.nan - } - }, - # 异常值检测 - { - 'method_name': 'IsolationForest', - 'params': { - 'contamination': 0.1, - 'random_state': 42 - } - }, - # 数据标准化 - { - 'method_name': 'StandardScaler', - 'params': { - 'with_mean': True, - 'with_std': True - } - } -] - -# 定义特征工程方法 -feature_methods = [ - # 类别特征编码 - { - 'method_name': 'OneHotEncoder', - 'params': { - 'sparse': False, - 'handle_unknown': 'ignore' - }, - - # columns 要处理的列名 - 'columns': ['categorical_feature1', 'categorical_feature2'] - }, - # 数值特征离散化 - { - 'method_name': 'KBinsDiscretizer', - 'params': { - 'n_bins': 5, - 'encode': 'onehot', - 'strategy': 'uniform' - }, - - # columns 要处理的列名 - 'columns': ['numeric_feature1', 'numeric_feature2'] - }, - # 特征选择 - { - 'method_name': 'SelectKBest', - 'params': { - 'k': 10, - 'score_func': 'f_classif' - }, - - # columns 要处理的列名 - 'columns': ['feature1', 'feature2', 'feature3', 'feature4'] - }, - # 降维 - { - 'method_name': 'PCA', - 'params': { - 'n_components': 2, - 'random_state': 42 - }, - - # columns 要处理的列名, 现并不能通过列的序号来获得列, 注意文件的列名 - 'columns': ['feature5', 'feature6', 'feature7'] - } -] - -# 数据集划分参数 -split_params = { - # 'test_size': 0, - 'val_size': 0.3 -} - -# 处理数据集 -result = processor.process_dataset( - input_path='dataset/dataset_raw/breast_cancer.csv', - output_dir='dataset/dataset_processed', - process_methods=process_methods, - # feature_methods=feature_methods, - feature_methods=[], - split_params=split_params -) - -# 打印处理结果 -print("\n数据处理结果:") -print(f"状态: {result['status']}") -if result['status'] == 'success': - print("\n处理记录:") - record = result['process_record'] - print(f"输入文件: {record['input_file']}") - print(f"处理时间: {record['timestamp']}") - print("\n输出文件:") - for key, path in record['output_files'].items(): - print(f"{key}: {path}") - - print("\n处理步骤:") - for step in record['steps']: - if 'shape' in step: - print(f"{step['step']}: 数据形状 {step['shape']}") -else: - print(f"错误信息: {result['message']}") \ No newline at end of file diff --git a/example_experiment_list.py b/example_experiment_list.py deleted file mode 100644 index 14eaf7d..0000000 --- a/example_experiment_list.py +++ /dev/null @@ -1,34 +0,0 @@ -from function.model_manager import ModelManager - -# 创建模型管理器实例 -manager = ModelManager() - -# 获取实验列表 -result = manager.get_experiments( - page=2, - page_size=10, - include_deleted=False -) - -# 打印结果 -print("\nMLFlow实验列表:") -print(f"状态: {result['status']}") -if result['status'] == 'success': - print(f"\n总数: {result['total_count']}") - print(f"当前页: {result['page']}") - print(f"每页数量: {result['page_size']}") - print("\n实验列表:") - for exp in result['experiments']: - print(f"\n实验ID: {exp['experiment_id']}") - print(f"名称: {exp['name']}") - print(f"存储位置: {exp['artifact_location']}") - print(f"状态: {exp['lifecycle_stage']}") - print(f"创建时间: {exp['creation_time']}") - print(f"最后更新: {exp['last_update_time']}") - print(f"运行次数: {exp['runs_count']}") - if exp['tags']: - print("标签:") - for tag_name, tag_value in exp['tags'].items(): - print(f" {tag_name}: {tag_value}") -else: - print(f"错误信息: {result['message']}") \ No newline at end of file diff --git a/example_get_all_dataset.py b/example_get_all_dataset.py deleted file mode 100644 index db1a727..0000000 --- a/example_get_all_dataset.py +++ /dev/null @@ -1,5 +0,0 @@ -from function.get_all_dataset import DatasetHistory - - -t = DatasetHistory() -print(t.get_dataset()) \ No newline at end of file diff --git a/example_method_reader_date_feature.py b/example_method_reader_date_feature.py deleted file mode 100644 index 5db4092..0000000 --- a/example_method_reader_date_feature.py +++ /dev/null @@ -1,14 +0,0 @@ -from function.method_reader_date_feature import MethodReader - -# 创建方法读取器实例 -reader = MethodReader() - -# 获取所有预处理方法 -methods = reader.get_preprocessing_methods() -print("预处理方法列表:") -print(methods) - -# 获取特定方法的详细信息 -method_details = reader.get_method_details('KBinsDiscretizer') -print("\nKBinsDiscretizer方法详情:") -print(method_details) \ No newline at end of file diff --git a/example_method_reader_date_process.py b/example_method_reader_date_process.py deleted file mode 100644 index 67bc139..0000000 --- a/example_method_reader_date_process.py +++ /dev/null @@ -1,14 +0,0 @@ -from function.method_reader_date_process import MethodReader - -# 创建方法读取器实例 -reader = MethodReader() - -# 获取所有预处理方法 -methods = reader.get_preprocessing_methods() -print("预处理方法列表:") -print(methods) - -# 获取特定方法的详细信息 -method_details = reader.get_method_details('StandardScaler') -print("\nStandardScaler方法详情:") -print(method_details) \ No newline at end of file diff --git a/example_method_reader_metric.py b/example_method_reader_metric.py deleted file mode 100644 index 80196bc..0000000 --- a/example_method_reader_metric.py +++ /dev/null @@ -1,9 +0,0 @@ - -from function.method_reader_metric import MethodReader - - - -method_reader = MethodReader() - - -print(method_reader.get_metrics()) \ No newline at end of file diff --git a/example_method_reader_model.py b/example_method_reader_model.py deleted file mode 100644 index d459f57..0000000 --- a/example_method_reader_model.py +++ /dev/null @@ -1,14 +0,0 @@ -from function.method_reader_model import MethodReader - -# 创建方法读取器实例 -reader = MethodReader() - -# 获取所有预处理方法 -methods = reader.get_models() -print("模型列表:") -print(methods) - -# 获取特定方法的详细信息 -method_details = reader.get_model_details('SVC') -print("\nSVC方法详情:") -print(method_details) \ No newline at end of file diff --git a/example_model_delete.py b/example_model_delete.py deleted file mode 100644 index 4dc9e98..0000000 --- a/example_model_delete.py +++ /dev/null @@ -1,6 +0,0 @@ -from function.model_manager import ModelManager - -# 创建模型管理器实例 -manager = ModelManager() -back = manager.delete_model('7970364d490f4e0aa0375c2db26215f3') -print(back) \ No newline at end of file diff --git a/example_model_predict.py b/example_model_predict.py deleted file mode 100644 index 68bcff2..0000000 --- a/example_model_predict.py +++ /dev/null @@ -1,8 +0,0 @@ -from function.model_manager import ModelManager - -model_manager = ModelManager() - -print(model_manager.predict(run_id = "33939ea6d8ce4d43a268f23f7361651e",\ - data_path="/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_145614/test_breast_cancer_20250219_145614.csv",\ - output_path="predictions/pred_breast_cancer_20250219_145614.csv" ,\ - metrics= ["accuracy", "f1", "precision", "recall"] )) \ No newline at end of file diff --git a/example_model_trainer.py b/example_model_trainer.py deleted file mode 100644 index ed0bc7b..0000000 --- a/example_model_trainer.py +++ /dev/null @@ -1,56 +0,0 @@ -from function.model_trainer import ModelTrainer -import pandas as pd -import numpy as np - -# 创建训练器实例 -trainer = ModelTrainer() - -# 加载数据 -train_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/train_breast_cancer_20250219_144629.csv') -val_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/val_breast_cancer_20250219_144629.csv') - -# 准备特征和标签 -X_train = train_data.drop('target', axis=1) -y_train = train_data['target'] -X_val = val_data.drop('target', axis=1) -y_val = val_data['target'] - -# 模型配置 -model_config = { - 'algorithm': 'XGBClassifier', - 'task_type': 'classification', - 'dataset' : '/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629', - 'params': { - 'n_estimators': 100, - 'learning_rate': 0.1, - 'max_depth': 6, - 'random_state': 42 - } -} - -# 训练模型, 删除训练实验时要删除 mlruns/.trash/ 回收站里的文件 -# 模型文件 直接在 mlruns/文件夹下 -for i in range(3, 4): - result = trainer.train_model( - { - 'features': X_train, - 'labels': y_train - }, - { - 'features': X_val, - 'labels': y_val - }, - model_config, - f'breast_cancer_classification_{i}' - ) - -# 打印结果 -print("\n训练结果:") -print(f"状态: {result['status']}") -if result['status'] == 'success': - print(f"\nMLflow运行ID: {result['run_id']}") - print("\n评估指标:") - for metric_name, metric_value in result['metrics'].items(): - print(f"{metric_name}: {metric_value:.4f}") -else: - print(f"错误信息: {result['message']}") \ No newline at end of file