From 11b8eb53a547afedea7611a5a5f9fdb190162451 Mon Sep 17 00:00:00 2001 From: haotian <2421912570@qq.com> Date: Thu, 10 Apr 2025 14:16:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9--=E4=BF=AE=E6=94=B9=E6=96=87?= =?UTF-8?q?=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/典型示例新版.md | 14 +++++++++++--- doc/接口文档code.md | 9 ++++++++- .../__pycache__/data_manager.cpython-39.pyc | Bin 18254 -> 18260 bytes function/data_manager.py | 7 +++++-- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/doc/典型示例新版.md b/doc/典型示例新版.md index b512d97..6725480 100644 --- a/doc/典型示例新版.md +++ b/doc/典型示例新版.md @@ -316,7 +316,8 @@ TEP数据集共包含53个特征列: 1. **完整性检查**:确认数据集无缺失值 2. **异常值处理**:使用IsolationForest方法识别并移除异常值 3. **数据标准化**:采用StandardScaler方法将特征标准化到[-1, 1]区间 -4. **数据集划分**:按8:2比例划分训练集和测试集 +4. **特征提取**: 使用PCA将特征维度从53降低到20 +5. **数据集划分**:按8:2比例划分训练集和测试集 **预处理配置**: ```json @@ -327,7 +328,7 @@ TEP数据集共包含53个特征列: { "method_name": "IsolationForest", "params": { - "n_estimators": 100, + "n_estimators": 200, "max_samples": "auto" } }, @@ -339,7 +340,14 @@ TEP数据集共包含53个特征列: } } ], - "feature_methods": [], + "feature_methods": [ + { + "method_name":"PCA", + "params":{ + "n_components": 20 + } + } + ], "split_params": { "train_size": 0.8, "val_size": 0.2 diff --git a/doc/接口文档code.md b/doc/接口文档code.md index 2b9a739..f2e319e 100644 --- a/doc/接口文档code.md +++ b/doc/接口文档code.md @@ -151,7 +151,14 @@ Request: } } ], - "feature_methods": [], + "feature_methods": [ + { + "method_name":"PCA", + "params":{ + "n_components": 20 + } + } + ], "split_params": { "train_size": 0.8, "val_size": 0.2 diff --git a/function/__pycache__/data_manager.cpython-39.pyc b/function/__pycache__/data_manager.cpython-39.pyc index c18e45b2cc126207487287f93e6c90bfb715ca65..c1e1bafd78477cb353edd0b53bc2d1faa896f8a3 100644 GIT binary patch delta 1014 zcmah|-Afcv6u;-r?9S}R%=nROiz0ru*%qvzk|;$fC`Cc|P)Q=yb+yq(8&@g2<_UW1)AP;M$p z5b@`J$FH&Tn@WG(h{ks=B=C1;3nP@r^Wzi~h$2g%xPS=0gNoP^Dd<21A}(Qm;`2^? z)K|pIhzb#1a;C@T1h$O>>&p1|fO8}*^#h!wPP!A}D;7)3>J*B*55wb4^Ti?9qS$%d4y z2Qt{)O$GIk4LOv~K@yVy31(%NCIeK`!z8pSEQ1YPLedo*k?<;55>HX7L<3`(C`4Vw zmXrshk(B)D)=7RX;0J&yMAbhhX2SP zCT(nR;Yv?<_n19OWT~kV&Y{w=Vikp6#l#=t@1J-RB-_kms=W_GW0 z@_y&s-(OLvBoWF!)<=5Z0E`9!Fz$w=8^(AT3Xm zkXbeshX8vQi?g4xZ?NHP9ls&Kj#GZ}vjCWB?fp(|Vg(@KylET7aLLiy;{ep@?pOqP n>U`)N0LVDf=`VhGqrMu)lO~4A+D3<~xOlAppY(fUo6mL_eAhQ!ICsUakU!$0m zH5#S}%N)29Gtw~kBFTc$$mJ9Xker++rh=$vjv6mhWSC6Vnre#XTwLL5faJl>1LX39 z$UqUzLmHw=3Zkm+PEcbv>e0O^NRlLkDf2RIRk2wTap1D9G2gnBh7_QLtV4>hvUMz*!E~pnUdAfsD4IT%q+wu{B@r+^5g)6wg}2#Bcxi5)7SHLT9w8WxDwaxDS4KaM;ASs|y% z1YxSFEo%{XG53%jM8-)+y=Z6-Ru~yvTH}r#HtdiUW6d41F@^O>vC#YsE{e{U6XJSH z8e+oJdI7k&+PVxkus$yO+Cp$!oNJqw&p4Pde@VP;8xhs*z4bn$&n8RS;k+Hfc9_8+ z=Gy1A7#4&&Kmjk*S>M{50M|o!f(>1`DonZpMkFe6B>@q9MA(*#s|9owQMH6BVmx>h zRIw2Zfm^%?hN!V6J_kR-L+km$NeSLt{?6AD{Q=ge?hgQ6*5lr33S(lYdqhkg{sK$Z z;E@&Z+{6cKY@(r9Nc8smt*?DSfNPf4|3-#;*7mVg3SdnP9-wG-afks-T06r_^d~5^ B3WNXv diff --git a/function/data_manager.py b/function/data_manager.py index 6be6719..daf5611 100644 --- a/function/data_manager.py +++ b/function/data_manager.py @@ -336,7 +336,10 @@ class DataManager: try: method_name = method['method_name'] params = method.get('params', {}) - columns = method.get('columns', df.drop('target', axis=1).columns) # 排除target列 + + df_columns = df.columns[:-1] + + columns = method.get('columns', df_columns) # 排除target列 if method_name not in self.feature_engineering_methods: raise ValueError(f"Unknown feature engineering method: {method_name}") @@ -445,7 +448,7 @@ class DataManager: train_data, val_data = train_test_split( train_val_data, test_size=val_size_adjusted, - random_state=42 + random_state=random_state ) else: train_data = train_val_data