修改--修改文档

This commit is contained in:
haotian 2025-04-10 14:16:46 +08:00
parent 0310f24840
commit 11b8eb53a5
4 changed files with 24 additions and 6 deletions

View File

@ -316,7 +316,8 @@ TEP数据集共包含53个特征列
1. **完整性检查**:确认数据集无缺失值
2. **异常值处理**使用IsolationForest方法识别并移除异常值
3. **数据标准化**采用StandardScaler方法将特征标准化到[-1, 1]区间
4. **数据集划分**按8:2比例划分训练集和测试集
4. **特征提取**: 使用PCA将特征维度从53降低到20
5. **数据集划分**按8:2比例划分训练集和测试集
**预处理配置**
```json
@ -327,7 +328,7 @@ TEP数据集共包含53个特征列
{
"method_name": "IsolationForest",
"params": {
"n_estimators": 100,
"n_estimators": 200,
"max_samples": "auto"
}
},
@ -339,7 +340,14 @@ TEP数据集共包含53个特征列
}
}
],
"feature_methods": [],
"feature_methods": [
{
"method_name":"PCA",
"params":{
"n_components": 20
}
}
],
"split_params": {
"train_size": 0.8,
"val_size": 0.2

View File

@ -151,7 +151,14 @@ Request:
}
}
],
"feature_methods": [],
"feature_methods": [
{
"method_name":"PCA",
"params":{
"n_components": 20
}
}
],
"split_params": {
"train_size": 0.8,
"val_size": 0.2

View File

@ -336,7 +336,10 @@ class DataManager:
try:
method_name = method['method_name']
params = method.get('params', {})
columns = method.get('columns', df.drop('target', axis=1).columns) # 排除target列
df_columns = df.columns[:-1]
columns = method.get('columns', df_columns) # 排除target列
if method_name not in self.feature_engineering_methods:
raise ValueError(f"Unknown feature engineering method: {method_name}")
@ -445,7 +448,7 @@ class DataManager:
train_data, val_data = train_test_split(
train_val_data,
test_size=val_size_adjusted,
random_state=42
random_state=random_state
)
else:
train_data = train_val_data