118 lines
4.8 KiB
Python
118 lines
4.8 KiB
Python
from function.model_manager import ModelManager
|
|
import pandas as pd
|
|
|
|
# 创建模型管理器实例
|
|
manager = ModelManager()
|
|
|
|
|
|
|
|
# 获取所有预处理方法
|
|
print("--------------------------------------------获取预处理方法---------------------------------------------------")
|
|
methods = manager.get_models()
|
|
print("模型列表:")
|
|
print(methods)
|
|
print("--------------------------------------------获取预处理方法 end ---------------------------------------------------")
|
|
|
|
|
|
print("--------------------------------------------获取方法详细信息---------------------------------------------------")
|
|
# 获取特定方法的详细信息
|
|
method_details = manager.get_model_details('LinearRegression')
|
|
print("\nLinearRegression方法详情:")
|
|
print(method_details)
|
|
print("--------------------------------------------获取方法详细信息 end ---------------------------------------------------")
|
|
|
|
|
|
|
|
print("--------------------------------------------评价指标 ---------------------------------------------------")
|
|
# 获取评价指标
|
|
print(manager.get_metrics())
|
|
print("--------------------------------------------评价指标 end ---------------------------------------------------")
|
|
|
|
|
|
print("--------------------------------------------获取所有已训练模型 ---------------------------------------------------")
|
|
# 获取所有已训练模型
|
|
result = manager.get_finished_models(
|
|
page=1,
|
|
page_size=10,
|
|
experiment_name='breast_cancer_classification_3'
|
|
)
|
|
|
|
# 打印结果
|
|
print("\n已训练模型列表:")
|
|
print(f"状态: {result['status']}")
|
|
if result['status'] == 'success':
|
|
print(f"\n总数: {result['total_count']}")
|
|
print(f"当前页: {result['page']}")
|
|
print(f"每页数量: {result['page_size']}")
|
|
print("\n模型列表:")
|
|
for model in result['models']:
|
|
'''
|
|
'run_id': run['run_id'],
|
|
'experiment_id': run['experiment_id'],
|
|
'''
|
|
print(f"run_id", model['run_id'])
|
|
print(f"experiment_id", model['experiment_id'])
|
|
print(f"算法: {model['algorithm']}")
|
|
print(f"任务类型: {model['task_type']}")
|
|
print(f"数据集: {model['dataset']}")
|
|
|
|
print(f"训练开始时间: {model['training_start_time']}")
|
|
print(f"训练结束时间: {model['training_end_time']}")
|
|
print("模型参数:")
|
|
for k, v in model['parameters'].items():
|
|
print(f" {k}: {v}")
|
|
print("评估指标:")
|
|
for metric_name, metric_value in model['metrics'].items():
|
|
print(f" {metric_name}: {metric_value:.4f}")
|
|
else:
|
|
print(f"错误信息: {result['message']}")
|
|
|
|
print("--------------------------------------------获取所有已训练模型 end ---------------------------------------------------")
|
|
|
|
|
|
print("--------------------------------------------模型训练---------------------------------------------------")
|
|
|
|
|
|
# 模型配置
|
|
model_config = {
|
|
'algorithm': 'XGBClassifier',
|
|
'task_type': 'classification',
|
|
# 'dataset' : '/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629',
|
|
'params': {
|
|
'n_estimators': 100,
|
|
'learning_rate': 0.1,
|
|
'max_depth': 6,
|
|
'random_state': 42
|
|
}
|
|
}
|
|
|
|
# 训练模型, 删除训练实验时要删除 mlruns/.trash/ 回收站里的文件
|
|
# 模型文件 直接在 mlruns/文件夹下
|
|
for i in range(3, 4):
|
|
result = manager.train_model(
|
|
'/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250224_170615/train_breast_cancer_20250224_170615.csv',
|
|
'/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250224_170615/val_breast_cancer_20250224_170615.csv',
|
|
model_config,
|
|
f'breast_cancer_classification_{i}'
|
|
)
|
|
|
|
# 打印结果
|
|
print("\n训练结果:")
|
|
print(f"状态: {result['status']}")
|
|
if result['status'] == 'success':
|
|
print(f"\nMLflow运行ID: {result['run_id']}")
|
|
print("\n评估指标:")
|
|
for metric_name, metric_value in result['metrics'].items():
|
|
print(f"{metric_name}: {metric_value:.4f}")
|
|
else:
|
|
print(f"错误信息: {result['message']}")
|
|
|
|
print("-------------------------------------------模型训练 end ---------------------------------------------------")
|
|
|
|
print("--------------------------------------------模型预测 ---------------------------------------------------")
|
|
print(manager.predict(run_id = "33939ea6d8ce4d43a268f23f7361651e",\
|
|
data_path="/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_145614/test_breast_cancer_20250219_145614.csv",\
|
|
output_path="predictions/pred_breast_cancer_20250219_145614.csv" ,\
|
|
metrics= ["accuracy", "f1", "precision", "recall"] ))
|
|
|
|
print("-------------------------------------------模型预测 end ---------------------------------------------------") |