From f428efa7db5f5e6e29a449524dd58bde21c327e0 Mon Sep 17 00:00:00 2001
From: haotian <2421912570@qq.com>
Date: Mon, 17 Feb 2025 15:18:25 +0800
Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90--=E5=AE=8C=E6=88=90=E8=8E=B7?=
 =?UTF-8?q?=E5=8F=96=E6=95=B0=E6=8D=AE=E9=A2=84=E5=A4=84=E7=90=86=E6=96=B9?=
 =?UTF-8?q?=E6=B3=95=E5=88=97=E8=A1=A8,=E8=8E=B7=E5=8F=96=E6=96=B9?=
 =?UTF-8?q?=E6=B3=95=E8=AF=A6=E6=83=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../__pycache__/method_reader.cpython-39.pyc  | Bin 0 -> 2955 bytes
 .../method_reader_date_process.cpython-39.pyc | Bin 0 -> 3757 bytes
 ...eader.py => method_reader_date_process.py} |  58 +++-
 date_feature/parameter.yaml                   | 279 +++++++++++-------
 date_feature/parameter_new.yaml               | 255 ----------------
 example_method_reader.py                      |  14 +
 mlruns/0/meta.yaml                            |   6 +
 tests/test_method_reader.py                   |  49 +++
 8 files changed, 289 insertions(+), 372 deletions(-)
 create mode 100644 data_process/__pycache__/method_reader.cpython-39.pyc
 create mode 100644 data_process/__pycache__/method_reader_date_process.cpython-39.pyc
 rename data_process/{method_reader.py => method_reader_date_process.py} (60%)
 delete mode 100644 date_feature/parameter_new.yaml
 create mode 100644 example_method_reader.py
 create mode 100644 mlruns/0/meta.yaml
 create mode 100644 tests/test_method_reader.py
diff --git a/data_process/__pycache__/method_reader.cpython-39.pyc b/data_process/__pycache__/method_reader.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..20a9060406d8603cd90302757b3773a267bd7fd6
GIT binary patch
literal 2955
zcmaJ@-H#kc5%2E#+}-=K6B{lNXA?ri0=_eOLJ^8UaSRXkDZ&y~Mk2#xdv<Ta?98U8
z*UGNuqToA{D2WLViBW<r?FI{C9&88#5^RIS6aNA&o*=t-{ue-4RQ1eWc8y_1Q_~;S
zRXx?eU)2PQi&X;GJAeJ4{lqdM|G>`dWkBca@MI@IaKdR!Jbbg5c9_RHhG)=X-1JO6
zZ+RBHjo9uuo<qqu2{*a5L%6lgJeSkcq+#CzTV$iblzBXAN@bpmGTETYd@7KeO`My(
z_IGd`MnmFJPCUk`XK==i9pagx#m$S9Tio6;J-h5V+=ZSKy3ns&WW362(68_X@V}@Q
zPK9za<=+ki9*W)}Fg&^N(d0L8-}>SC-J4hUu3aC$d13s{`%OBt5p3gLo`5I24}>H`
zD%ptU#5Zy>G)Dwlmi|K4`!pzwU%fQ`#p~lYE)>T1&2BVo<-}>b9VKnA(hlWGXoo_%
zzMll0(D#*%voLaxAEyELJ9@W%Gfi4iTP@CYF>YBHx2_Pe06zcujrGm66Rrom6D3DQ
zn#%RfAeB*&te-mhR2;}wDmv>tkb&P7X*0~S_0pWs!C33|aUW3<N#93kWF`p07U<y6
zY|z%GI^f?Hq3nrdE?O2W!sR+zzl6qMQv!On=@!lDkd8=JmyENV{*+u|+w>fjMozhb
zSUGRJdEXY3=FrL^x?9E>!flh7WE+r{tbB!J8^;LM_tPMA@|-<%xHF>A2Hn%3Dc$~M
zO14Z&1`q4`3U_5SH@CGn)I3Mdkf*`!mieNYREzg9atuZQAC=<tUK{S+yg7@D$%R)w
zzW%$x5r|$0(F@A}!YP(9T<Zs&cu+6mR4<OJ-iqR|o}{wgN_z>f2ePiH7+5{oI{H-z
zo*O33l!J|p!GouJO+1lSFOK_lJo%8<i_<BM4eozZh*an~STaBPSiMmbcv{L1UjRmB
z>c9!`y^+cj=vl8+X4(xCW#UG?N*1(2jWFTjGBZkAsj58rLNn~jC{2_VqBmt^QZy`4
z10Ance3jn?sw&#@>=KkT)bK0BB4~;V2*@ZTI}Cy_MqRj<*b4mufXQY{^e~-<s^H<;
zUBEtvDI4tUW9skF8NAgdTT~(j=n!&3L`H0Aj0m8N<xD&ZNHZ}@<P0)|f#<%jYt85y
zxgnm>x;A%mz~CLa?u@RPo8mRCTj7AHsV=VpdKWIT5`R|zZ-74w@OP}l=c06T>l6I(
zrJ~Q5agB{wT-mZ-v`&|Ajl~tf;(^|0;6wL*^n1uix8B<sU*6gM@yi7$$FKfmZ}h&p
zN0T{g25~6LOp$rDnZe#curPW1SCjKscW+#oy!FSzNW)MjrW?==v!;lOBex5@sUw{z
z%K&2j*&yzP{$`Ny7`w%E<{i4rJfu=Q@ZB4K7+?PN_@x_#AGMnHWE{cfcqqdr0&CyO
z%KTJX7#QC;H~#QLFfjSqOOxNeuAHpk1r!WBlY#7I$|?6T_<TVKNTL!6F~`c19@%4c
zf$8`P6A!^lw>KIUWyU}ffq+s5tXJmu!+xghpbHG);!~)Kw9xuu6<Q5jb3l9!+TPL~
z5nrALWE!F6>9>^w0uS;IlBiX1JfBLYv8jW0_BaTnj%ggGCfp3~f4k2A>w(5?*8+OM
zI{WtEkyA6JZtZ`%gV@%Jjp`aC@F1HE-Iv!~LjMa44hEbaC*S=ZPz_4pb5IO(hD13_
zgpK{2t&*Isk}W9IpkWMkdLJ>}vf7}jTlRj-!4`Kf>VnN6wIi9Hx1mf_u}tvVB?Ak^
z8IpVpQoPasbD83iVo)sjg0yE-JYOvOd}*Iz%b4zgV*0HqwhR<o={*RaQ&4R3#w+8i
z?-u;peRpT?+Rt~dzx?r^Mw1_0YZ&6QpeG(cQAdHxRHZATq#1SNP!UxP_}L(lK|9RU
zB9F3}?!ADkIBEvZ#-X1z!z2(<nn6~*A1!?e#ZeS%DAqwVO0*t<_P{9)u|BANQb`6!
z=AmDlUL8mlMDY~!;$c|6(Qq`(#Us#B)$bsq_1e3Fyh!xoi>T{1q13fg(f6j{z!Gs+
zBp*a*l?W>La1u;pxB-ALbLm0I{&3N1asSJ_y;Qpw>p%h22a0bjYr%h8+h|mUgPW@Q
zekbKUh@-C-eE<1g5SPDAj_>od>HA^{^aP?;S0T~B7BW~oiUR+WW`wR*2ye8WVLouF
z$$&x!mcA@-yipPOi>Peb??RF(4CAk$VreF@Dk%r+VH`cH>#trk<3RgEFUtK|vGd3A
NXR>s)q6^~!`wv$0M$-TQ

literal 0
HcmV?d00001

diff --git a/data_process/__pycache__/method_reader_date_process.cpython-39.pyc b/data_process/__pycache__/method_reader_date_process.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e22f73599daca90a3dc564680be60c379661a1e
GIT binary patch
literal 3757
zcmds4ZI2t(6~1@gZSQ)$35gmgi_-)e(<ZyL^esZDQX&14SP?-WMxvv^ckG>ZJmbAH
zj<Ds8Xm)uKkcJPiRUwhQZPc>;khDcD&?N2uxZ*?AV(;cB6sb|qxidEQcHsxqBcHi1
z=bn3g?s=Yb?9R^S34C7u<WG$s93bRh*qQvO(D@;J<5M6QVKg8%zLkJB6<cYlwo21+
z&DP|+ZtL(?1EXo$CM7>6Ok?^wVfvb4Ta2C~72^)rBGrl_v?u&pBDB+foK&dL&brBR
z4d*65;}bjuqaLv-Beue*tulqF>%`VPooQDn)0wfZ*+$kgnFT#F>*ZJ;dX|@ie&LG3
zX4owB^Q;J`nG-W-ykt3IPkAo$cpJPxh8rIbKX~WP%a^uqU*EZLb8vZM@T>Q0bmCap
z#*_UBzVT5I3F*^B*`z(<s6EoxHVL$JXc@9)%3rRuzXGa**RBqJ_r~DOjdYvw2Gxos
z%phts{IFr?8eVc5+8!5{<AiS0a~xsdER5`Tg2-h~Q=Z$YMPc1<h}r4x!BnT^a#)*q
ze56*|J%XN(y`a8Dco9zbS#@zaYI=(<Yx?1F9!1IGvKuA78!nzXeKv5Ddc>QH%uQUU
z#iN=R$BWs<TsqK!DQ`4&qSe9U`=OsW4n|sRfFQ~Y&C~9IGZTJ4G17wL7~D(RJe+d4
z4$IJhbolR~(Opg;9M|Y7?a@BnByl-Wn9`%~k*mrYy+{+aN12MjcS*f`@2Zk$eZ2=4
zw5mQ!n4uAktU>68rAJ6yT_RLI&w=cbpBjCWnVS^apnDE96RUHLl2wh8?om0PV^)&y
zX=`#f)VxTZCC`D~RqcWn=F|5QS%MJ=v0QrXulILu-<}Mu;l`_<-uz2<A@i7*`4g@`
zbLokW+YGwp)KBH~p38MV@XBG7l<QGDWMwxg3(7y!+e!WS_qV99ys#EAuu<(EKG&|{
z1=ZU@&?#fcc&waWR%Wbw|C5|YT+YFg>B*DjtpeA;)xz*z1c=6B{uJz8jgnKivs@{(
zsO5!1!-Lwn*sXgq?6`%?G(W6IBKPErHLsQUQ7Ci|cN1!y@GYIsf{tBCf0fe$><e1A
zjVzj}C!;YHbD+s{AWHC!4}u_yLM?cTN{JqWa59u4JxE8cN}+sW50w00#?;{Q#o;@D
zNa-qzsl$83)EM|_SRFY#qJ}SyEWQt1&G)0Y2Sl}dbgz&a8y-bfc{-|g&z~%FyhHu~
z3gn!LTP{HNFnssEWh!s^$TV?CE*$TX@!Zr!Bk7FE<Dby!zTF_JG(oV_K4cT0Y$|<q
zlK`D4J%xV@C`H5k(o-O}s0i?}u0Elw_Ei28scSH^2V}EL*P77PdK!OS>OwZ^X(L@$
z0GgV)qGUv)cRm54(Sc}|G9sEyte*Zk(Xe9LXLGp5K+-9#>KF8L*;|rM2}ozYeINYL
zonL$i+3U`)*9X_ux8HgtWuL)ozueh;U+kA89@pFevO`J>v0a!LY~$@iWDPG}-@dgq
zeEV;yk%}rbOh}@@ytu~w6x&8>K`b==I0i0p&bvX|bC%tZ1=t;JaA1#lNV^E}?OT5x
zT>InTrCaGf;!xC10v`^?Ac578LB>`}(^FAuU~uc=;G>Vg!0<OO4gdUxFyoW}q0}(q
z#7)|<Ftc-XA53Eu(rkjv*WwfOoAF=_fqFljYfIr3c*%CPk`r10Y{)U7gbM40_A{>&
z3&U*z6*B%+R7L)i`g{Re6+@C8e;C?!ahJ#EMh+QyXm0e|vH^~G*o6~u2sVZ+J8|fd
zZyo{d_;C=(k(zptYVaud{KqoCtOpu*EgcXB>x{eIht5o<=P%q2d|OBlDytMe4;$AM
z+&=pt_z=}Hb1y;R)cpqom@(xE^8B$?$kGt#P(1Zi1j~8C^VsjHhe(eeBC84|E03<^
z0f@9s_-V-Tu;M)^DzB=T<-rmFb;6Q5Vd-hG1Q5+ELH1s`u<|rO4=rtsE$Nvh15Tp#
zDTC5+6G#ABkma*hz5!>GV})^RMz(OpEVfwjikX5^Rt(x(wDa%;l=Z&BN{KlI<@vPF
z_Kl%TGbqcqgfbS)``Z`+DU^qAzB;)6Zi?gWch`4r{C4~1E1&*-bNKTc)r!i$28#S4
z6ki9?HPbsRcki3182oTL&vzH5C&yHp(K$bc_OQqmMC4lB4{Lra@I>Ba=iM-I8(u7C
znIBJd?*S|Xe$9O$@SM2jg)aA_Smei+<BG|TjI8o+W9v~A-$C(R6yF0;$#C}|w7Uz_
zcuhmD49wvA!3e1Q8@S4n(B#LVE%MJG3(2K?1_r(iod_Iz6w9)7v1(AuHX`1!i{qn1
zF$)#&UXa{}7|EcJo$Glp89xRBu&8KIuk3>iW+)adsZ%`-rlBMrKmXS5TC=4Q5+6u!
zF{^6+cb96Vz)jdg<Q=COu{Jo}5i^dn(sqOFpM&W*EUGyU#|nxgJY_w_5tw`l1@aU>
ziQ@B&EL*bp!txuph|!4_)fCGp&KFC?Z*rWe<oLbt5=PQ#0hy-uRUA>Gm?HrQ6AQe+
fe?b<Ma?xJ<2<x%*2RsfuL1Ls>@Rh9eW|aQ|)DI+Z

literal 0
HcmV?d00001

diff --git a/data_process/method_reader.py b/data_process/method_reader_date_process.py
similarity index 60%
rename from data_process/method_reader.py
rename to data_process/method_reader_date_process.py
index 3d01904..378a4e3 100644
--- a/data_process/method_reader.py
+++ b/data_process/method_reader_date_process.py
@@ -11,6 +11,7 @@ class MethodReader:
         """初始化方法读取器"""
         self.logger = logging.getLogger(__name__)
         self.method_config = self._load_method_config()
+        self.parameter_config = self._load_parameter_config()
         
     def _load_method_config(self) -> Dict:
         """加载方法配置文件"""
@@ -29,6 +30,22 @@ class MethodReader:
             self.logger.error(f"Error loading method config: {str(e)}")
             raise
             
+    def _load_parameter_config(self) -> Dict:
+        """加载参数配置文件"""
+        try:
+            config_path = Path('date_preprocessing/parameter.yaml')
+            if not config_path.exists():
+                raise FileNotFoundError(f"Parameter config file not found at {config_path}")
+                
+            with open(config_path, 'r', encoding='utf-8') as f:
+                config = yaml.safe_load(f)
+                
+            self.logger.info("Successfully loaded parameter config")
+            return config
+        except Exception as e:
+            self.logger.error(f"Error loading parameter config: {str(e)}")
+            raise
+            
     def get_preprocessing_methods(self) -> Dict:
         """获取预处理方法列表"""
         try:
@@ -76,22 +93,39 @@ class MethodReader:
     def get_method_details(self, method_name: str) -> Dict:
         """获取指定方法的详细信息"""
         try:
-            # 在各个方法类别中查找
+            # 在各个方法类别中查找方法原理和优缺点
+            method_info = None
             for category in ['data_scaler_methods', 'missing_value_handling_methods', 'outlier_detection_methods']:
                 if method_name in self.method_config.get(category, {}):
                     method_info = self.method_config[category][method_name]
-                    return {
-                        "status": "success",
-                        "method": {
-                            "name": method_name,
-                            "principle": method_info.get('principle', ''),
-                            "advantages": method_info.get('advantages', []),
-                            "disadvantages": method_info.get('disadvantages', []),
-                            "applicable_scenarios": method_info.get('applicable_scenarios', [])
-                        }
-                    }
+                    break
                     
-            raise ValueError(f"Method {method_name} not found")
+            if method_info is None:
+                raise ValueError(f"Method {method_name} not found in method config")
+                
+            # 查找方法参数信息
+            parameter_info = None
+            for category in ['data_scaler_methods', 'missing_value_handling_methods', 'outlier_detection_methods']:
+                if method_name in self.parameter_config.get(category, {}):
+                    parameter_info = self.parameter_config[category][method_name]
+                    break
+                    
+            if parameter_info is None:
+                raise ValueError(f"Method {method_name} not found in parameter config")
+                
+            # 组合返回信息
+            return {
+                "status": "success",
+                "method": {
+                    "name": method_name,
+                    "description": parameter_info.get('description', ''),
+                    "principle": method_info.get('principle', ''),
+                    "advantages": method_info.get('advantages', []),
+                    "disadvantages": method_info.get('disadvantages', []),
+                    "applicable_scenarios": method_info.get('applicable_scenarios', []),
+                    "parameters": parameter_info.get('parameters', [])
+                }
+            }
             
         except Exception as e:
             self.logger.error(f"Error getting method details: {str(e)}")
diff --git a/date_feature/parameter.yaml b/date_feature/parameter.yaml
index 7155848..9bd9da1 100644
--- a/date_feature/parameter.yaml
+++ b/date_feature/parameter.yaml
@@ -1,186 +1,255 @@
 feature_engineering_methods_parameters:
 
   LabelEncoder:
-    parameters: {}
+    description: "将分类标签编码为整数。"
+    parameters: []
 
   KBinsDiscretizer:
+    description: "将连续数据分箱为离散数据。"
     parameters:
-      n_bins:
-        description: "指定每个特征要分成的箱数。可以是单个整数，表示所有特征使用相同的箱数；也可以是形状为 (n_features,) 的数组，为每个特征指定不同的箱数。"
+      - name: "n_bins"
+        type: "int or array-like"
         default: 5
-      encode:
+        description: "指定每个特征要分成的箱数。可以是单个整数，表示所有特征使用相同的箱数；也可以是形状为 (n_features,) 的数组，为每个特征指定不同的箱数。"
+      - name: "encode"
+        type: "str"
+        default: "onehot"
         description: "指定离散化后输出的编码方式。可选值包括 'onehot'（独热编码）、'onehot-dense'（密集独热编码）、'ordinal'（序数编码）。"
-        default: 'onehot'
-      strategy:
+      - name: "strategy"
+        type: "str"
+        default: "quantile"
         description: "定义分箱策略。可选值包括 'uniform'（均匀分箱）、'quantile'（分位数分箱）、'kmeans'（K-Means 聚类分箱）。"
-        default: 'quantile'
 
   FunctionTransformer:
+    description: "对数据应用自定义函数进行转换。"
     parameters:
-      func:
+      - name: "func"
+        type: "callable"
+        default: null
         description: "要应用于输入数据的函数。"
+      - name: "inverse_func"
+        type: "callable"
         default: null
-      inverse_func:
         description: "func 的逆函数，如果存在。"
-        default: null
-      validate:
-        description: "布尔值，指示是否在转换前验证输入数据。"
+      - name: "validate"
+        type: "bool"
         default: false
-      accept_sparse:
-        description: "布尔值，指示是否接受稀疏矩阵作为输入。"
+        description: "指示是否在转换前验证输入数据。"
+      - name: "accept_sparse"
+        type: "bool"
         default: false
-      check_inverse:
-        description: "布尔值，指示在适合期间是否检查 func 和 inverse_func 是否互为逆函数。"
+        description: "指示是否接受稀疏矩阵作为输入。"
+      - name: "check_inverse"
+        type: "bool"
         default: true
-      kw_args:
+        description: "指示在适合期间是否检查 func 和 inverse_func 是否互为逆函数。"
+      - name: "kw_args"
+        type: "dict"
+        default: null
         description: "传递给 func 的其他关键字参数。"
+      - name: "inv_kw_args"
+        type: "dict"
         default: null
-      inv_kw_args:
         description: "传递给 inverse_func 的其他关键字参数。"
-        default: null
 
   PowerTransformer:
+    description: "对数据进行幂变换以使其更符合正态分布。"
     parameters:
-      method:
+      - name: "method"
+        type: "str"
+        default: "yeo-johnson"
         description: "指定变换方法。可选值包括 'yeo-johnson' 和 'box-cox'。"
-        default: 'yeo-johnson'
-      standardize:
-        description: "布尔值，指示是否在变换后将数据标准化为零均值和单位方差。"
+      - name: "standardize"
+        type: "bool"
         default: true
-      copy:
-        description: "布尔值，指示是否复制输入数据，或在原地进行变换。"
+        description: "指示是否在变换后将数据标准化为零均值和单位方差。"
+      - name: "copy"
+        type: "bool"
         default: true
+        description: "指示是否复制输入数据，或在原地进行变换。"
 
   QuantileTransformer:
+    description: "将数据转换为均匀分布或正态分布。"
     parameters:
-      n_quantiles:
-        description: "用于分位数变换的分位数数量。"
+      - name: "n_quantiles"
+        type: "int"
         default: 1000
-      output_distribution:
+        description: "用于分位数变换的分位数数量。"
+      - name: "output_distribution"
+        type: "str"
+        default: "uniform"
         description: "指定输出分布。可选值包括 'uniform' 和 'normal'。"
-        default: 'uniform'
-      ignore_implicit_zeros:
-        description: "布尔值，指示是否忽略隐式零。"
+      - name: "ignore_implicit_zeros"
+        type: "bool"
         default: false
-      subsample:
+        description: "指示是否忽略隐式零。"
+      - name: "subsample"
+        type: "int"
+        default: 100000
         description: "用于计算分位数的子样本大小。"
-        default: 1e5
-      random_state:
-        description: "用于随机数生成的种子。"
+      - name: "random_state"
+        type: "int or None"
         default: null
-      copy:
-        description: "布尔值，指示是否复制输入数据，或在原地进行变换。"
+        description: "用于随机数生成的种子。"
+      - name: "copy"
+        type: "bool"
         default: true
+        description: "指示是否复制输入数据，或在原地进行变换。"
 
   FeatureHasher:
+    description: "使用哈希技巧将特征映射到向量。"
     parameters:
-      n_features:
-        description: "哈希空间的维度。"
+      - name: "n_features"
+        type: "int"
         default: 1048576
-      input_type:
+        description: "哈希空间的维度。"
+      - name: "input_type"
+        type: "str"
+        default: "dict"
         description: "输入数据的类型。可选值包括 'dict' 和 'pair'。"
-        default: 'dict'
-      dtype:
+      - name: "dtype"
+        type: "type"
+        default: "float64"
         description: "输出数据的类型。"
-        default: 'float64'
-      alternate_sign:
-        description: "布尔值，指示是否在哈希时使用交替符号。"
+      - name: "alternate_sign"
+        type: "bool"
         default: true
+        description: "指示是否在哈希时使用交替符号。"
 
   DictVectorizer:
+    description: "将符号表示的特征（如字典）转换为稀疏矩阵。"
     parameters:
-      dtype:
+      - name: "dtype"
+        type: "type"
+        default: "float64"
         description: "输出数据的类型。"
-        default: 'float64'
-      separator:
+      - name: "separator"
+        type: "str"
+        default: "="
         description: "用于分隔特征名称的分隔符。"
-        default: '='
-      sparse:
-        description: "布尔值，指示是否返回稀疏矩阵。"
+      - name: "sparse"
+        type: "bool"
         default: true
-      sort:
-        description: "布尔值，指示是否对特征名称排序。"
+        description: "指示是否返回稀疏矩阵。"
+      - name: "sort"
+        type: "bool"
         default: true
+        description: "指示是否对特征名称排序。"
+
 
   PCA:
-    parameters:
-      n_components:
-        description: "要保留的主成分数量。可以是整数、浮点数或 'mle'。"
-        default: null
-      copy:
-        description: "布尔值，指示是否复制输入数据，或在原地进行变换。"
-        default: true
-      whiten:
-        description: "布尔值，指示是否对主成分进行白化。"
-        default: false
-      svd_solver:
-        description: "用于计算 SVD 的方法。可选值包括 'auto'、'full'、'arpack' 和 'randomized'。"
-        default: 'auto'
-      tol:
-        description: "奇异值分解的容差。"
-        default: 0.0
-      iterated_power:
-        description: "用于随机化 SVD 的迭代次数。"
-        default: 'auto'
-      random_state:
-        description: "用于随机数生成的种子。"
-        default: null
+  description: "主成分分析，用于降维。"
+  parameters:
+    - name: "n_components"
+      type: "int, float, None or str"
+      default: null
+      description: "要保留的主成分数量。可以是整数、浮点数或 'mle'。"
+    - name: "copy"
+      type: "bool"
+      default: true
+      description: "指示是否复制输入数据，或在原地进行变换。"
+    - name: "whiten"
+      type: "bool"
+      default: false
+      description: "指示是否对主成分进行白化。"
+    - name: "svd_solver"
+      type: "str"
+      default: "auto"
+      description: "用于计算 SVD 的方法。可选值包括 'auto'、'full'、'arpack' 和 'randomized'。"
+    - name: "tol"
+      type: "float"
+      default: 0.0
+      description: "奇异值分解的容差。"
+    - name: "iterated_power"
+      type: "int or 'auto'"
+      default: 'auto'
+      description: "用于随机化 SVD 的迭代次数。"
+    - name: "random_state"
+      type: "int or None"
+      default: null
+      description: "用于随机数生成的种子。"
 
   SelectKBest:
+    description: "选择最重要的 K 个特征。"
     parameters:
-      score_func:
+      - name: "score_func"
+        type: "callable"
+        default: "f_classif"
         description: "用于计算特征得分的函数。"
-        default: 'f_classif'
-      k:
-        description: "要选择的特征数量。"
+      - name: "k"
+        type: "int"
         default: 10
+        description: "要选择的特征数量。"
 
   RFE:
+    description: "递归特征消除，用于选择最重要的特征。"
     parameters:
-      estimator:
+      - name: "estimator"
+        type: "object"
+        default: null
         description: "用于特征选择的基模型。"
+      - name: "n_features_to_select"
+        type: "int"
         default: null
-      n_features_to_select:
         description: "要选择的特征数量。"
-        default: null
-      step:
-        description: "每次迭代要移除的特征数量。"
+      - name: "step"
+        type: "int"
         default: 1
-      verbose:
-        description: "控制冗长模式的整数。"
+        description: "每次迭代要移除的特征数量。"
+      - name: "verbose"
+        type: "int"
         default: 0
+        description: "控制冗长模式的整数。"
 
   PolynomialFeatures:
+    description: "生成多项式特征，增加模型的非线性能力。"
     parameters:
-      degree:
-        description: "生成多项式特征的最高次数。"
+      - name: "degree"
+        type: "int"
         default: 2
-      interaction_only:
-        description: "布尔值，指示是否仅包含交互项。"
+        description: "生成多项式特征的最高次数。"
+      - name: "interaction_only"
+        type: "bool"
         default: false
-      include_bias:
-        description: "布尔值，指示是否包含偏置列。"
+        description: "指示是否仅包含交互项。"
+      - name: "include_bias"
+        type: "bool"
         default: true
-      order:
+        description: "指示是否包含偏置列。"
+      - name: "order"
+        type: "str"
+        default: "C"
         description: "输出特征的顺序。可选值包括 'C' 和 'F'。"
-        default: 'C'
 
   OneHotEncoder:
+    description: "将分类特征转换为独热编码。"
     parameters:
-      categories:
+      - name: "categories"
+        type: "str or list or 'auto'"
+        default: "auto"
         description: "指定每个特征的类别。"
-        default: 'auto'
-      drop:
-        description: "指定要从每个特征中删除的类别。"
+      - name: "drop"
+        type: "str or array-like"
         default: null
-      sparse:
-        description: "布尔值，指示是否返回稀疏矩阵。"
+        description: "指定要从每个特征中删除的类别。"
+      - name: "sparse"
+        type: "bool"
         default: true
-      dtype:
+        description: "指示是否返回稀疏矩阵。"
+      - name: "dtype"
+        type: "type"
+        default: "float64"
         description: "输出数据的类型。"
-        default: 'float64'
-      handle_unknown:
-        description: "指定如何处理未知类别。可选值"
+      - name: "handle_unknown"
+        type: "str"
+        default: "error"
+        description: "指定如何处理未知类别。可选值包括 'error'（抛出异常）、'ignore'（忽略）。"
+      - name: "max_categories"
+        type: "int or None"
+        default: null
+        description: "在类别过多时，将类别限制为最大类别数量。"
 
-      
+  
 
+
+  
diff --git a/date_feature/parameter_new.yaml b/date_feature/parameter_new.yaml
deleted file mode 100644
index 9bd9da1..0000000
--- a/date_feature/parameter_new.yaml
+++ /dev/null
@@ -1,255 +0,0 @@
-feature_engineering_methods_parameters:
-
-  LabelEncoder:
-    description: "将分类标签编码为整数。"
-    parameters: []
-
-  KBinsDiscretizer:
-    description: "将连续数据分箱为离散数据。"
-    parameters:
-      - name: "n_bins"
-        type: "int or array-like"
-        default: 5
-        description: "指定每个特征要分成的箱数。可以是单个整数，表示所有特征使用相同的箱数；也可以是形状为 (n_features,) 的数组，为每个特征指定不同的箱数。"
-      - name: "encode"
-        type: "str"
-        default: "onehot"
-        description: "指定离散化后输出的编码方式。可选值包括 'onehot'（独热编码）、'onehot-dense'（密集独热编码）、'ordinal'（序数编码）。"
-      - name: "strategy"
-        type: "str"
-        default: "quantile"
-        description: "定义分箱策略。可选值包括 'uniform'（均匀分箱）、'quantile'（分位数分箱）、'kmeans'（K-Means 聚类分箱）。"
-
-  FunctionTransformer:
-    description: "对数据应用自定义函数进行转换。"
-    parameters:
-      - name: "func"
-        type: "callable"
-        default: null
-        description: "要应用于输入数据的函数。"
-      - name: "inverse_func"
-        type: "callable"
-        default: null
-        description: "func 的逆函数，如果存在。"
-      - name: "validate"
-        type: "bool"
-        default: false
-        description: "指示是否在转换前验证输入数据。"
-      - name: "accept_sparse"
-        type: "bool"
-        default: false
-        description: "指示是否接受稀疏矩阵作为输入。"
-      - name: "check_inverse"
-        type: "bool"
-        default: true
-        description: "指示在适合期间是否检查 func 和 inverse_func 是否互为逆函数。"
-      - name: "kw_args"
-        type: "dict"
-        default: null
-        description: "传递给 func 的其他关键字参数。"
-      - name: "inv_kw_args"
-        type: "dict"
-        default: null
-        description: "传递给 inverse_func 的其他关键字参数。"
-
-  PowerTransformer:
-    description: "对数据进行幂变换以使其更符合正态分布。"
-    parameters:
-      - name: "method"
-        type: "str"
-        default: "yeo-johnson"
-        description: "指定变换方法。可选值包括 'yeo-johnson' 和 'box-cox'。"
-      - name: "standardize"
-        type: "bool"
-        default: true
-        description: "指示是否在变换后将数据标准化为零均值和单位方差。"
-      - name: "copy"
-        type: "bool"
-        default: true
-        description: "指示是否复制输入数据，或在原地进行变换。"
-
-  QuantileTransformer:
-    description: "将数据转换为均匀分布或正态分布。"
-    parameters:
-      - name: "n_quantiles"
-        type: "int"
-        default: 1000
-        description: "用于分位数变换的分位数数量。"
-      - name: "output_distribution"
-        type: "str"
-        default: "uniform"
-        description: "指定输出分布。可选值包括 'uniform' 和 'normal'。"
-      - name: "ignore_implicit_zeros"
-        type: "bool"
-        default: false
-        description: "指示是否忽略隐式零。"
-      - name: "subsample"
-        type: "int"
-        default: 100000
-        description: "用于计算分位数的子样本大小。"
-      - name: "random_state"
-        type: "int or None"
-        default: null
-        description: "用于随机数生成的种子。"
-      - name: "copy"
-        type: "bool"
-        default: true
-        description: "指示是否复制输入数据，或在原地进行变换。"
-
-  FeatureHasher:
-    description: "使用哈希技巧将特征映射到向量。"
-    parameters:
-      - name: "n_features"
-        type: "int"
-        default: 1048576
-        description: "哈希空间的维度。"
-      - name: "input_type"
-        type: "str"
-        default: "dict"
-        description: "输入数据的类型。可选值包括 'dict' 和 'pair'。"
-      - name: "dtype"
-        type: "type"
-        default: "float64"
-        description: "输出数据的类型。"
-      - name: "alternate_sign"
-        type: "bool"
-        default: true
-        description: "指示是否在哈希时使用交替符号。"
-
-  DictVectorizer:
-    description: "将符号表示的特征（如字典）转换为稀疏矩阵。"
-    parameters:
-      - name: "dtype"
-        type: "type"
-        default: "float64"
-        description: "输出数据的类型。"
-      - name: "separator"
-        type: "str"
-        default: "="
-        description: "用于分隔特征名称的分隔符。"
-      - name: "sparse"
-        type: "bool"
-        default: true
-        description: "指示是否返回稀疏矩阵。"
-      - name: "sort"
-        type: "bool"
-        default: true
-        description: "指示是否对特征名称排序。"
-
-
-  PCA:
-  description: "主成分分析，用于降维。"
-  parameters:
-    - name: "n_components"
-      type: "int, float, None or str"
-      default: null
-      description: "要保留的主成分数量。可以是整数、浮点数或 'mle'。"
-    - name: "copy"
-      type: "bool"
-      default: true
-      description: "指示是否复制输入数据，或在原地进行变换。"
-    - name: "whiten"
-      type: "bool"
-      default: false
-      description: "指示是否对主成分进行白化。"
-    - name: "svd_solver"
-      type: "str"
-      default: "auto"
-      description: "用于计算 SVD 的方法。可选值包括 'auto'、'full'、'arpack' 和 'randomized'。"
-    - name: "tol"
-      type: "float"
-      default: 0.0
-      description: "奇异值分解的容差。"
-    - name: "iterated_power"
-      type: "int or 'auto'"
-      default: 'auto'
-      description: "用于随机化 SVD 的迭代次数。"
-    - name: "random_state"
-      type: "int or None"
-      default: null
-      description: "用于随机数生成的种子。"
-
-  SelectKBest:
-    description: "选择最重要的 K 个特征。"
-    parameters:
-      - name: "score_func"
-        type: "callable"
-        default: "f_classif"
-        description: "用于计算特征得分的函数。"
-      - name: "k"
-        type: "int"
-        default: 10
-        description: "要选择的特征数量。"
-
-  RFE:
-    description: "递归特征消除，用于选择最重要的特征。"
-    parameters:
-      - name: "estimator"
-        type: "object"
-        default: null
-        description: "用于特征选择的基模型。"
-      - name: "n_features_to_select"
-        type: "int"
-        default: null
-        description: "要选择的特征数量。"
-      - name: "step"
-        type: "int"
-        default: 1
-        description: "每次迭代要移除的特征数量。"
-      - name: "verbose"
-        type: "int"
-        default: 0
-        description: "控制冗长模式的整数。"
-
-  PolynomialFeatures:
-    description: "生成多项式特征，增加模型的非线性能力。"
-    parameters:
-      - name: "degree"
-        type: "int"
-        default: 2
-        description: "生成多项式特征的最高次数。"
-      - name: "interaction_only"
-        type: "bool"
-        default: false
-        description: "指示是否仅包含交互项。"
-      - name: "include_bias"
-        type: "bool"
-        default: true
-        description: "指示是否包含偏置列。"
-      - name: "order"
-        type: "str"
-        default: "C"
-        description: "输出特征的顺序。可选值包括 'C' 和 'F'。"
-
-  OneHotEncoder:
-    description: "将分类特征转换为独热编码。"
-    parameters:
-      - name: "categories"
-        type: "str or list or 'auto'"
-        default: "auto"
-        description: "指定每个特征的类别。"
-      - name: "drop"
-        type: "str or array-like"
-        default: null
-        description: "指定要从每个特征中删除的类别。"
-      - name: "sparse"
-        type: "bool"
-        default: true
-        description: "指示是否返回稀疏矩阵。"
-      - name: "dtype"
-        type: "type"
-        default: "float64"
-        description: "输出数据的类型。"
-      - name: "handle_unknown"
-        type: "str"
-        default: "error"
-        description: "指定如何处理未知类别。可选值包括 'error'（抛出异常）、'ignore'（忽略）。"
-      - name: "max_categories"
-        type: "int or None"
-        default: null
-        description: "在类别过多时，将类别限制为最大类别数量。"
-
-  
-
-
-  
diff --git a/example_method_reader.py b/example_method_reader.py
new file mode 100644
index 0000000..4f11c7c
--- /dev/null
+++ b/example_method_reader.py
@@ -0,0 +1,14 @@
+from data_process.method_reader_date_process import MethodReader
+
+# 创建方法读取器实例
+reader = MethodReader()
+
+# 获取所有预处理方法
+methods = reader.get_preprocessing_methods()
+print("预处理方法列表:")
+print(methods)
+
+# 获取特定方法的详细信息
+method_details = reader.get_method_details('StandardScaler')
+print("\nStandardScaler方法详情:")
+print(method_details) 
\ No newline at end of file
diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml
new file mode 100644
index 0000000..ee51c2d
--- /dev/null
+++ b/mlruns/0/meta.yaml
@@ -0,0 +1,6 @@
+artifact_location: mlflow-artifacts:/0
+creation_time: 1739520200398
+experiment_id: '0'
+last_update_time: 1739520200398
+lifecycle_stage: active
+name: Default
diff --git a/tests/test_method_reader.py b/tests/test_method_reader.py
new file mode 100644
index 0000000..5cb3f51
--- /dev/null
+++ b/tests/test_method_reader.py
@@ -0,0 +1,49 @@
+import unittest
+from data_process.method_reader_date_process import MethodReader
+
+class TestMethodReader(unittest.TestCase):
+    def setUp(self):
+        self.reader = MethodReader()
+        
+    def test_get_preprocessing_methods(self):
+        result = self.reader.get_preprocessing_methods()
+        self.assertEqual(result['status'], 'success')
+        self.assertIsInstance(result['methods'], list)
+        
+        # 检查返回的方法列表
+        methods = result['methods']
+        self.assertTrue(any(m['name'] == 'data_scaler' for m in methods))
+        self.assertTrue(any(m['name'] == 'missing_value_handler' for m in methods))
+        self.assertTrue(any(m['name'] == 'outlier_detector' for m in methods))
+        
+    def test_get_method_details(self):
+        # 测试获取StandardScaler的详细信息
+        result = self.reader.get_method_details('StandardScaler')
+        self.assertEqual(result['status'], 'success')
+        self.assertEqual(result['method']['name'], 'StandardScaler')
+        
+        # 检查返回的详细信息字段
+        method = result['method']
+        self.assertIn('description', method)
+        self.assertIn('principle', method)
+        self.assertIn('advantages', method)
+        self.assertIn('disadvantages', method)
+        self.assertIn('applicable_scenarios', method)
+        self.assertIn('parameters', method)
+        
+        # 检查参数信息
+        parameters = method['parameters']
+        self.assertIsInstance(parameters, list)
+        if parameters:
+            param = parameters[0]
+            self.assertIn('name', param)
+            self.assertIn('type', param)
+            self.assertIn('default', param)
+            self.assertIn('description', param)
+        
+        # 测试获取不存在的方法
+        result = self.reader.get_method_details('NonExistentMethod')
+        self.assertEqual(result['status'], 'error')
+        
+if __name__ == '__main__':
+    unittest.main() 
\ No newline at end of file