修改--修改时间计划
This commit is contained in:
parent
48d30f560d
commit
c4f09c9028
@ -20,14 +20,15 @@
|
||||
## 4. 监控系统 (1)
|
||||
- 20250226
|
||||
- [x] 资源监控
|
||||
- [ ] 训练监控
|
||||
- [ ] 告警系统
|
||||
- [ ] 日志聚合
|
||||
- [ ] 训练监控 --暂无等后台起来再做
|
||||
- [ ] 告警系统 --暂无
|
||||
- [x] 日志聚合
|
||||
- 1 天
|
||||
|
||||
## 5. FastAPI后端服务 (2)
|
||||
- 20250221~20250224
|
||||
- [ ] API路由设计
|
||||
- [x] 整合系统方法
|
||||
- [x] API路由设计
|
||||
- [ ] 请求/响应模型
|
||||
- [ ] 异步处理支持
|
||||
- [ ] API文档生成
|
||||
|
||||
@ -1,117 +0,0 @@
|
||||
from function.data_processor_date import DataProcessor
|
||||
import numpy as np
|
||||
|
||||
# 创建处理器实例
|
||||
processor = DataProcessor()
|
||||
|
||||
# 定义数据预处理方法
|
||||
process_methods = [
|
||||
# 缺失值处理
|
||||
{
|
||||
'method_name': 'SimpleImputer',
|
||||
'params': {
|
||||
'strategy': 'mean',
|
||||
'missing_values': np.nan
|
||||
}
|
||||
},
|
||||
# 异常值检测
|
||||
{
|
||||
'method_name': 'IsolationForest',
|
||||
'params': {
|
||||
'contamination': 0.1,
|
||||
'random_state': 42
|
||||
}
|
||||
},
|
||||
# 数据标准化
|
||||
{
|
||||
'method_name': 'StandardScaler',
|
||||
'params': {
|
||||
'with_mean': True,
|
||||
'with_std': True
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# 定义特征工程方法
|
||||
feature_methods = [
|
||||
# 类别特征编码
|
||||
{
|
||||
'method_name': 'OneHotEncoder',
|
||||
'params': {
|
||||
'sparse': False,
|
||||
'handle_unknown': 'ignore'
|
||||
},
|
||||
|
||||
# columns 要处理的列名
|
||||
'columns': ['categorical_feature1', 'categorical_feature2']
|
||||
},
|
||||
# 数值特征离散化
|
||||
{
|
||||
'method_name': 'KBinsDiscretizer',
|
||||
'params': {
|
||||
'n_bins': 5,
|
||||
'encode': 'onehot',
|
||||
'strategy': 'uniform'
|
||||
},
|
||||
|
||||
# columns 要处理的列名
|
||||
'columns': ['numeric_feature1', 'numeric_feature2']
|
||||
},
|
||||
# 特征选择
|
||||
{
|
||||
'method_name': 'SelectKBest',
|
||||
'params': {
|
||||
'k': 10,
|
||||
'score_func': 'f_classif'
|
||||
},
|
||||
|
||||
# columns 要处理的列名
|
||||
'columns': ['feature1', 'feature2', 'feature3', 'feature4']
|
||||
},
|
||||
# 降维
|
||||
{
|
||||
'method_name': 'PCA',
|
||||
'params': {
|
||||
'n_components': 2,
|
||||
'random_state': 42
|
||||
},
|
||||
|
||||
# columns 要处理的列名, 现并不能通过列的序号来获得列, 注意文件的列名
|
||||
'columns': ['feature5', 'feature6', 'feature7']
|
||||
}
|
||||
]
|
||||
|
||||
# 数据集划分参数
|
||||
split_params = {
|
||||
# 'test_size': 0,
|
||||
'val_size': 0.3
|
||||
}
|
||||
|
||||
# 处理数据集
|
||||
result = processor.process_dataset(
|
||||
input_path='dataset/dataset_raw/breast_cancer.csv',
|
||||
output_dir='dataset/dataset_processed',
|
||||
process_methods=process_methods,
|
||||
# feature_methods=feature_methods,
|
||||
feature_methods=[],
|
||||
split_params=split_params
|
||||
)
|
||||
|
||||
# 打印处理结果
|
||||
print("\n数据处理结果:")
|
||||
print(f"状态: {result['status']}")
|
||||
if result['status'] == 'success':
|
||||
print("\n处理记录:")
|
||||
record = result['process_record']
|
||||
print(f"输入文件: {record['input_file']}")
|
||||
print(f"处理时间: {record['timestamp']}")
|
||||
print("\n输出文件:")
|
||||
for key, path in record['output_files'].items():
|
||||
print(f"{key}: {path}")
|
||||
|
||||
print("\n处理步骤:")
|
||||
for step in record['steps']:
|
||||
if 'shape' in step:
|
||||
print(f"{step['step']}: 数据形状 {step['shape']}")
|
||||
else:
|
||||
print(f"错误信息: {result['message']}")
|
||||
@ -1,34 +0,0 @@
|
||||
from function.model_manager import ModelManager
|
||||
|
||||
# 创建模型管理器实例
|
||||
manager = ModelManager()
|
||||
|
||||
# 获取实验列表
|
||||
result = manager.get_experiments(
|
||||
page=2,
|
||||
page_size=10,
|
||||
include_deleted=False
|
||||
)
|
||||
|
||||
# 打印结果
|
||||
print("\nMLFlow实验列表:")
|
||||
print(f"状态: {result['status']}")
|
||||
if result['status'] == 'success':
|
||||
print(f"\n总数: {result['total_count']}")
|
||||
print(f"当前页: {result['page']}")
|
||||
print(f"每页数量: {result['page_size']}")
|
||||
print("\n实验列表:")
|
||||
for exp in result['experiments']:
|
||||
print(f"\n实验ID: {exp['experiment_id']}")
|
||||
print(f"名称: {exp['name']}")
|
||||
print(f"存储位置: {exp['artifact_location']}")
|
||||
print(f"状态: {exp['lifecycle_stage']}")
|
||||
print(f"创建时间: {exp['creation_time']}")
|
||||
print(f"最后更新: {exp['last_update_time']}")
|
||||
print(f"运行次数: {exp['runs_count']}")
|
||||
if exp['tags']:
|
||||
print("标签:")
|
||||
for tag_name, tag_value in exp['tags'].items():
|
||||
print(f" {tag_name}: {tag_value}")
|
||||
else:
|
||||
print(f"错误信息: {result['message']}")
|
||||
@ -1,5 +0,0 @@
|
||||
from function.get_all_dataset import DatasetHistory
|
||||
|
||||
|
||||
t = DatasetHistory()
|
||||
print(t.get_dataset())
|
||||
@ -1,14 +0,0 @@
|
||||
from function.method_reader_date_feature import MethodReader
|
||||
|
||||
# 创建方法读取器实例
|
||||
reader = MethodReader()
|
||||
|
||||
# 获取所有预处理方法
|
||||
methods = reader.get_preprocessing_methods()
|
||||
print("预处理方法列表:")
|
||||
print(methods)
|
||||
|
||||
# 获取特定方法的详细信息
|
||||
method_details = reader.get_method_details('KBinsDiscretizer')
|
||||
print("\nKBinsDiscretizer方法详情:")
|
||||
print(method_details)
|
||||
@ -1,14 +0,0 @@
|
||||
from function.method_reader_date_process import MethodReader
|
||||
|
||||
# 创建方法读取器实例
|
||||
reader = MethodReader()
|
||||
|
||||
# 获取所有预处理方法
|
||||
methods = reader.get_preprocessing_methods()
|
||||
print("预处理方法列表:")
|
||||
print(methods)
|
||||
|
||||
# 获取特定方法的详细信息
|
||||
method_details = reader.get_method_details('StandardScaler')
|
||||
print("\nStandardScaler方法详情:")
|
||||
print(method_details)
|
||||
@ -1,9 +0,0 @@
|
||||
|
||||
from function.method_reader_metric import MethodReader
|
||||
|
||||
|
||||
|
||||
method_reader = MethodReader()
|
||||
|
||||
|
||||
print(method_reader.get_metrics())
|
||||
@ -1,14 +0,0 @@
|
||||
from function.method_reader_model import MethodReader
|
||||
|
||||
# 创建方法读取器实例
|
||||
reader = MethodReader()
|
||||
|
||||
# 获取所有预处理方法
|
||||
methods = reader.get_models()
|
||||
print("模型列表:")
|
||||
print(methods)
|
||||
|
||||
# 获取特定方法的详细信息
|
||||
method_details = reader.get_model_details('SVC')
|
||||
print("\nSVC方法详情:")
|
||||
print(method_details)
|
||||
@ -1,6 +0,0 @@
|
||||
from function.model_manager import ModelManager
|
||||
|
||||
# 创建模型管理器实例
|
||||
manager = ModelManager()
|
||||
back = manager.delete_model('7970364d490f4e0aa0375c2db26215f3')
|
||||
print(back)
|
||||
@ -1,8 +0,0 @@
|
||||
from function.model_manager import ModelManager
|
||||
|
||||
model_manager = ModelManager()
|
||||
|
||||
print(model_manager.predict(run_id = "33939ea6d8ce4d43a268f23f7361651e",\
|
||||
data_path="/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_145614/test_breast_cancer_20250219_145614.csv",\
|
||||
output_path="predictions/pred_breast_cancer_20250219_145614.csv" ,\
|
||||
metrics= ["accuracy", "f1", "precision", "recall"] ))
|
||||
@ -1,56 +0,0 @@
|
||||
from function.model_trainer import ModelTrainer
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
# 创建训练器实例
|
||||
trainer = ModelTrainer()
|
||||
|
||||
# 加载数据
|
||||
train_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/train_breast_cancer_20250219_144629.csv')
|
||||
val_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/val_breast_cancer_20250219_144629.csv')
|
||||
|
||||
# 准备特征和标签
|
||||
X_train = train_data.drop('target', axis=1)
|
||||
y_train = train_data['target']
|
||||
X_val = val_data.drop('target', axis=1)
|
||||
y_val = val_data['target']
|
||||
|
||||
# 模型配置
|
||||
model_config = {
|
||||
'algorithm': 'XGBClassifier',
|
||||
'task_type': 'classification',
|
||||
'dataset' : '/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629',
|
||||
'params': {
|
||||
'n_estimators': 100,
|
||||
'learning_rate': 0.1,
|
||||
'max_depth': 6,
|
||||
'random_state': 42
|
||||
}
|
||||
}
|
||||
|
||||
# 训练模型, 删除训练实验时要删除 mlruns/.trash/ 回收站里的文件
|
||||
# 模型文件 直接在 mlruns/文件夹下
|
||||
for i in range(3, 4):
|
||||
result = trainer.train_model(
|
||||
{
|
||||
'features': X_train,
|
||||
'labels': y_train
|
||||
},
|
||||
{
|
||||
'features': X_val,
|
||||
'labels': y_val
|
||||
},
|
||||
model_config,
|
||||
f'breast_cancer_classification_{i}'
|
||||
)
|
||||
|
||||
# 打印结果
|
||||
print("\n训练结果:")
|
||||
print(f"状态: {result['status']}")
|
||||
if result['status'] == 'success':
|
||||
print(f"\nMLflow运行ID: {result['run_id']}")
|
||||
print("\n评估指标:")
|
||||
for metric_name, metric_value in result['metrics'].items():
|
||||
print(f"{metric_name}: {metric_value:.4f}")
|
||||
else:
|
||||
print(f"错误信息: {result['message']}")
|
||||
Loading…
Reference in New Issue
Block a user