56 lines
1.7 KiB
Python
56 lines
1.7 KiB
Python
from function.model_trainer import ModelTrainer
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
# 创建训练器实例
|
|
trainer = ModelTrainer()
|
|
|
|
# 加载数据
|
|
train_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/train_breast_cancer_20250219_144629.csv')
|
|
val_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/val_breast_cancer_20250219_144629.csv')
|
|
|
|
# 准备特征和标签
|
|
X_train = train_data.drop('target', axis=1)
|
|
y_train = train_data['target']
|
|
X_val = val_data.drop('target', axis=1)
|
|
y_val = val_data['target']
|
|
|
|
# 模型配置
|
|
model_config = {
|
|
'algorithm': 'XGBClassifier',
|
|
'task_type': 'classification',
|
|
'dataset' : '/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629',
|
|
'params': {
|
|
'n_estimators': 100,
|
|
'learning_rate': 0.1,
|
|
'max_depth': 6,
|
|
'random_state': 42
|
|
}
|
|
}
|
|
|
|
# 训练模型, 删除训练实验时要删除 mlruns/.trash/ 回收站里的文件
|
|
# 模型文件 直接在 mlruns/文件夹下
|
|
for i in range(3, 4):
|
|
result = trainer.train_model(
|
|
{
|
|
'features': X_train,
|
|
'labels': y_train
|
|
},
|
|
{
|
|
'features': X_val,
|
|
'labels': y_val
|
|
},
|
|
model_config,
|
|
f'breast_cancer_classification_{i}'
|
|
)
|
|
|
|
# 打印结果
|
|
print("\n训练结果:")
|
|
print(f"状态: {result['status']}")
|
|
if result['status'] == 'success':
|
|
print(f"\nMLflow运行ID: {result['run_id']}")
|
|
print("\n评估指标:")
|
|
for metric_name, metric_value in result['metrics'].items():
|
|
print(f"{metric_name}: {metric_value:.4f}")
|
|
else:
|
|
print(f"错误信息: {result['message']}") |