From e43ac40027eb946145f672e04741193887631a73 Mon Sep 17 00:00:00 2001 From: haotian <2421912570@qq.com> Date: Tue, 25 Feb 2025 11:17:54 +0800 Subject: [PATCH] =?UTF-8?q?=E7=94=9F=E6=88=90--=E5=89=8D=E7=AB=AF=E8=AE=BE?= =?UTF-8?q?=E8=AE=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_process/__init__.py | 0 .../__pycache__/__init__.cpython-39.pyc | Bin 151 -> 0 bytes .../__pycache__/data_cleaner.cpython-39.pyc | Bin 2919 -> 0 bytes .../__pycache__/data_processor.cpython-39.pyc | Bin 2492 -> 0 bytes .../data_processor_date.cpython-39.pyc | Bin 8675 -> 0 bytes .../__pycache__/data_splitter.cpython-39.pyc | Bin 1713 -> 0 bytes .../feature_engineer.cpython-39.pyc | Bin 3098 -> 0 bytes .../__pycache__/method_reader.cpython-39.pyc | Bin 2955 -> 0 bytes .../method_reader_date_feature.cpython-39.pyc | Bin 3498 -> 0 bytes .../method_reader_date_process.cpython-39.pyc | Bin 3757 -> 0 bytes data_process/data_cleaner.py | 66 -------- data_process/data_processor.py | 63 -------- data_process/data_splitter.py | 49 ------ data_process/feature_engineer.py | 77 --------- data_process/test_data_processor.py | 34 ---- data_process/ttt.py | 2 - doc/接口文档code.md | 149 ++++++++++++++++++ test_data_processor.py | 93 ----------- test_method_reader.py | 49 ------ test_model_manager.py | 99 ------------ test_model_trainer.py | 85 ---------- test_system_monitor.py | 86 ---------- 处理乳腺癌数据集.py | 6 - 23 files changed, 149 insertions(+), 709 deletions(-) delete mode 100644 data_process/__init__.py delete mode 100644 data_process/__pycache__/__init__.cpython-39.pyc delete mode 100644 data_process/__pycache__/data_cleaner.cpython-39.pyc delete mode 100644 data_process/__pycache__/data_processor.cpython-39.pyc delete mode 100644 data_process/__pycache__/data_processor_date.cpython-39.pyc delete mode 100644 data_process/__pycache__/data_splitter.cpython-39.pyc delete mode 100644 data_process/__pycache__/feature_engineer.cpython-39.pyc delete mode 100644 data_process/__pycache__/method_reader.cpython-39.pyc delete mode 100644 data_process/__pycache__/method_reader_date_feature.cpython-39.pyc delete mode 100644 data_process/__pycache__/method_reader_date_process.cpython-39.pyc delete mode 100644 data_process/data_cleaner.py delete mode 100644 data_process/data_processor.py delete mode 100644 data_process/data_splitter.py delete mode 100644 data_process/feature_engineer.py delete mode 100644 data_process/test_data_processor.py delete mode 100644 data_process/ttt.py delete mode 100644 test_data_processor.py delete mode 100644 test_method_reader.py delete mode 100644 test_model_manager.py delete mode 100644 test_model_trainer.py delete mode 100644 test_system_monitor.py delete mode 100644 处理乳腺癌数据集.py diff --git a/data_process/__init__.py b/data_process/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/data_process/__pycache__/__init__.cpython-39.pyc b/data_process/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 8fb5816cb4c0b4a7e9253c38089fb65695737abe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 151 zcmYe~<>g`kg6liiri19mAOaaM0yz#qT+9L_QW%06G#UL?G8BP?5yUSW{fzwFRQ<%1 z+{`@PqWt_4{fxx?lFY|Zc}ULWMuIZyck3WOnfAC@Xvf<%1xFrL_2%{;CS=pEd6g*5vedBj~fnRWfWHQTWu5>h6_D zMem3r=28~o2wsxSp{4CBNvyQ}b|Mu{x|(E@l)sftO2q}d=)Wb4G*U^CzgY-w<~UMC zO2+75`EutEvVsmL0c9kx7=@Hpxy>5)=v^|R%x2D=E~IycyW1Z3d6WC=1pCapXPfYy zpvBhA=*d{?UW2t+=Z+J!+1dHZj#<@*RV(Z%80p?6HVJx=^3%GFxxSwADCeT=Lp+DS z|Lfu2Uk)Gs^6+nec>L+VHX$=NF7i<_j;*C6u#b5(;Fd3gQRFt6(TY9*h!kYQtUU@X zz+_r`-U3{El)3lpii8bDAO>$!+ZlWxKIBU!BE`o$+RZpiqFi^gLaJ~i_jfiDQ{vA;iYGW%Z6a=;LpKg9K3UYzWRRq)z@TcRPfWBJtl{ySY;+VzXakzz)WfyG9cKSrp%xJb(^m)aFv zl_HPvSS&8g3?qLXZutqA=~mTXv?Ss}MYoBf+t%3Nu-({gR8~c{?Hg45KsgMaJ|g$6 z+w>O2x0SU{cJ0cZxfQJ%>*N98ST$ZDFy_sC2IC(Q=G<=}wiSIBP_v0@?(9>t>rgWN zs+refu4-4#ZHk_8<}LCr`8gciUFW8gx9av1xdd+zbwjp;{X36;_p8T`{&}$f>A?q& zHUR@K$>X$Se7g429H5*OLUA^zx8w3)`qI@h#&bC;({yLB73D0&;Tqk6L01k+gnIC~ zH(xe={tY1tQP0MaFHXNa79iZ@eG#32K3Mz(XN)1E@W8#TxorM{^Dhx(+-L64Ve) zx%djcHRc)U>9ci$;TPB|&%%(zabj7(s8y>)dvLE>Yt*%R^l3VMVVPfF>SNe2VQ5#D z*Cbcs0(gt@FfT3#I>+Qfp_*PsQUQ_ZHm%@*@6lZ(k2QlM4zvT>;5fF=5TJ{FiZ1rf z4WNrXr;9ann4{dveu6IUu6@%6x|r6`#XX_RuV+Ue?jPOVJJ{a?x(uoKHU{SZglEMK zpn<>yru_jcCutIck_<%g3pqVs!!K|a!hbw0FAp|{9!SPkG~v3>L@^0Z2F2IFoA^4K zZ=m@mnjx5>vxFl?pnHW##z`Kfp>ZW%!l|dQe9C;EFMR+V$`vq(qf0yVMX(0HXP5DN zVx4xUGKLy+gz1739`i{z)fSYVts-SLpv!9E!{lQ?KZA$qZxP1pmUvl#aw`B50JIg^ zc52ml09uZ!P&KaG+b$@%4UjweMOfv)s*&=T%RJ`qS-WoK&ivD>S@XKJ>i|WXvleSD zw1Ksansn|vwVG5;pR9vY7)|Ny{2NH(%}LUnby-*SD(?xB^me_Q9+1Seh9p?c`gH;O z(+7ut|INXpTL%vx0!hSoAyx4#nt6G*qxTaTtR#za9l`;IdOiN*hj{(thewDWl!h)8 z8x?{2o1(5eaMoFog;>-1(f{C@Hm`66q5;gCt_+G&r3n`c^%2kG7N4uB$ILwB-eMLq zuPA|+nMes^BpW1gnq=j42%pb8XJ(FB9H#FOGv+gfWrcjiLdydSRU z9$TvSg%J2JNV^jtMkEE6VeLTCfYNV(b)l93=5@drm+r~y@b^IE7w3|X6+Bp3yyUs$ q*|n6{m^(5~z>^^chO`v#NrCWRsdwvD6rf*D*z1qtp5W*X9siu diff --git a/data_process/__pycache__/data_processor.cpython-39.pyc b/data_process/__pycache__/data_processor.cpython-39.pyc deleted file mode 100644 index 8a8485f39361e1a1018d9b71fa2c51a1e02ed84e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2492 zcmZuz-HRMW5by5!*xAoq?kkEK*Qnse683qGh-J-6}hNW)ZjSJm{?$FHg;`(k<@hgevdldDoxSG`b zT2lAxNyBd#o*g%nmftdKC*GAz_!DML2uyUP4oP8Uh1`WM(m7CN@JUyZs-+b5zzY3EEDWvTW~syC2@#_;T&u$M0`^@zcHUe_jCd z+-{ckqNOeajq$)Wrt}y*#XcZIn!6Dhl7J{WV#->lu(pR}cxBCqc{dwz`d$(`hz4q0{ooj>V zpMEARx|sA%(LUII`WbY7a~X?^BHpr3AVc92NFfA{5o#A53)Mo$k}Wt|*UyJV)IDYr zrR_Hsj-TpKIRz3ErjpFy7SoK20*aInW>cH8e+G~4aCK+ktGy@|K_04Q?Mg_BOt+!I zfupeeO%#5E#o+xR9YK+=Llhn+Bf3s5vmxWw1tt$Gwqgxwk0M%D?2*Imh^(_O=|}Xk zmA(jC4rnbXHgbpXKJfMd&jsFj;8iwxE#>jbkbO>Wu&eY6U~Q#3s&Q{fk+V)I8CvH6 zb2d@+)n6&OY*R9L%4jusRW*n9)%%)P$T{*B;kC>5yLMVD=R@QWa3FJalevu#Mt|PE zT~>#tbASB{LtEYAqF)07agkwC=nBkl_XYHhW!`&Xu;)x#^z%HEO7J$uudP;dakxF$ zyV&ny(e?UqyxNYlkb?qZwB3_g(jH8|CS@j#0BXcY4!0LNE$x_w0T9UEf%0HE*Paw1 z55R`9t}G}g-7vhdEu{t+q-$rxxG&7vayR5i+fmxfboI4MU18drb_9M{TZNJxr$o;q zs90tnt=9|=qGAWAbQPTl(1k}qzd%A5qm$I5d*Nx|!&+>Tx~xTK>0r-xxZQud!{j`? zD)FmbY%k-n4Pf5_cd#;KI{;=4Ed#IvL%M=sgU*NrnC=HY!sjSJ(i$3Yq_e=Q0Pig5 z+Mw(4D(V72oRQ0GC9n`i=SX@4KvY@%dJBl0k;m(*`T!6$HbG=&1EL0isA)j-$DJD+ zH$N*uBp(AG|xQKD-wkCA~qz=gqr+>ffyA~}HMAdoQx?q@{Kq3942>~#M(6sG(zmico`MgxIAjJ3CF8UPD|u>_GmZ9 zTfQg{b<%zUHw8;`h~#PxQ7#QYh)Qh#5&`PZ6c=I<%5*N5A}_BPRK+VrR*+!N^;=^t z_%pztf+{lijG5rLh~eUk1zZkUD$%;K#aEWYVmXe^%Q~>My;8uB3Cjv&@PjN#~VH$GMFJorVt~yyaXXe^PHTMd<=-lJpYu+nx+4-FLxkn||$4VC@Rywbk_c8f|G|_*ZPH$>Li?su_ zY8Y!rYe5*NPB!X}9UG6=!^ewWu;5(Sr_YV13 zd5EeU7XOH21;L3YkD)kl)W^9$-UywV%@0|$X4IArI!+CDRXym<*^XbgsV?MJ&8vi% zrV`YhT3Dr35PKIoasf#kxSn2|#h3PDPK{9najlW@Q>!dc+ z(9c-ZS%zgVXlBOF&gN{5Aj>VPEYAw4&EHU4WPPYDuwhYOw2OA1z-TOoPNs&~MLU~CE)nysD7D-E~Wl5GoJxqs2m~a5x+ZN5ipjUAR8n5N-@Ng`2}IOj*pMz06dsSZj=d)XKu^D}Vis zm5bl&UF*sZ-kd1#b!g`632Y#+k-#Pbn+a?oP$rNjKsUuR1W5bw9Kb}L=LsJmP#{2? z70j~?A~+IQ_e7ISOfZtnZxp3{~e)+}$&v7^T!g`{})FtA;vz0x%WpI6XkPuAz@ zylP%8tW@ZXgZ%)Kl9ltaky2#wS8jOA%6}LmsH5L%j|Bfsb7T#xPj)7+!k~43Wae}{ zf~lVY62jD-ms@fq&&o_;>LnShtSqS!ggDe@HDYd-x}q`pb#;TZEVqz6!N*B+D%508GEDeS>8pLiNzWv^{U%h?xwKrD&>9wXd34S;Uu`}#*uo@Nr z48*nPyyyZxDvUop?vAs{_~G$m<0qQ;jqk7mD0kPM2+F(5<2#7c+NfhgdG(ZC$htLy z)+sPM?KV!iXIPE%?ZD04A*lYtutj8r47v-NHC8?USu+oIg$8x45E?knbWwgMjm%n% zwFjC8dLAXZv36wY(Bl&--${f>EGi&n=BGiWaylU+_$C3n(2L&%a0pL85+*5zoL4k? zL=Rq2#EKaz7&7lKD3sKLuhb7v$kfm4>jBkLq!P<{;RyrqY71l2z(Mf#`& zK1xUFS%szW*DopH4LMY2)g^6ND9K2Tv?w(LzF5>-2KYt?7a5U!MS5LuPe^rrHu}*WXmzALVBIqdvBQZ4_siN2$~;6;SH$l){0i@4O6H60rZg%r-5m zIK9pLCCt60XYR6C$3Qq34X``dR<`Yu`pVW8jMD6IWJH5(JLVj}q{-4#61!8AH0iu5 zOWwdexcxxe3j;_0r}4D!?XD=@xi_|hO@QXkMfEA^1V)A3iEw9H&>?Q7%?S$OTk19JL^9V3jmS)UgiSuKJu+ zD$Ug7Y=F-4v&~c^oZkI_Fv=8Q;gqhE2aNO3iHe80Y%f4uRDm6?)ux)e$V8P_ow0J& zrywS@*<|?%J7_pzP!6m)o0UVq9H)aum3;8#owprUZg-h(IL`cp8tb^quo1-RwmpxF zYnR_#e~@#Z_b!2M5T6Q#F<*)ljCNE z!q&%H&71aPrOr%)+u#ixf2>>ey6rJ>!SP51C1<|l<8=-0aJ#;9{|RT1Crq>jk70$e z3b#2nf{;%W)8eXqK|)1MtRWD?1&W)|Si`NynSaTyCO^?VW`Cer}6|;YA=%5DI3#H#R zHyyx+k9Xq4$quKrR|)s7h9!z)&b8OltJR5G3C<>wJW&Pov?*J%lnrS@a_n{q1^sZ5)b_*dX=DC7>Y)ETv%brpJ9;GFvvSauN)jNtb%32 ze3TZ(SvX72!~DY(XIts#(jMF|dJ9vWg(=Q8DCAuE#doiL_szBi{?UcDK^9hzA4I!Q zg3Zle^v=Me_8^@-0b4ndhJ9ry&RYeoe~j^OLCIHrr{Q`5KT3zW=^+SO4SM8OQI-t1o;A)Z{QWR9n_ZA!l3tRh z6^5wcf%R#fSogCN-n59Lrx)TJmMt+lqA+=A_Fq zKPBjh)xm2|FDSD`Ry?DiW$R1Iw2XvjmqyF*imZgPl+bJ;%`=;g((s$x)PEmk?{h7>Z(8OT5-qyO9O1^7D{9g ze*HAmNVcs;`j>{-uuvl-;c%3_NsWxQvd?9qM#NjFkx{6Tu_VPM)X2?_?5YkC^48qm zQ6z1HTJCYER@I5plLDfA!!45@l++5yX3ijp`SK)KL|UM<2N*O|7QjsF@qjf4kmNwP z!Vua*PpjWj&u%`eVd2Vk@(X0ScUG2QWtUB z7U^oXD&ui>zk{p?c}ou>oRa<%-K6IIw-n;FPQ>JXhmyF#gFren@Us{_At$nIkmNB%sJHA^7hQp0lsPcQ-?mQbWb6iy!2@moql&P)>p&rVM8Erqwx znUlwGqwXYPeH~)3*rU{^o4NdS)EO)$+2$^FqjLp<8g5mP|LSeXt1A@G&hSEsWj-VIe@QRsH%rpCdF|!)FNi z=w8*16|Wwr>GKY^=7kyM4H`oB+te8M>mtRD&lRzr2!xOu!9%w)L3WnT5`ck^38%S+CNr2v#jFxI^sgYqUtgvm!57LGeR80X0g<$&tfZ?Al*) zJ`FgPl%pkAbAaq{it=@+gDuX>LrM{&$i1W%DpP${^e%Pkw4x|u4$)^oAxmB&OTO(> zC;@&1ON09)T#VI#Q8=X}P@Nyy_pVd=CJN2z5X?S4wLWW95s}||q z*0jjb;X?=Vz-r*+rG&mm78q_O*}~LxC;y|(|44>KgO%70Ya%bDSUS|n zqIEJxZ8y_sWuD8xK8Uxl4+hk1Mtr47at&=a^U90YNliigXvc9Ls+ak0g#tvIlh$jF z($OKXDvK}+8RC-^5$`KE)4R%W_kb9?$|n&L16!BKg6J7`(g7lDoSI1UJ1|`jPyQtp zZ}9T6J&f7C0<)NeQxu~!ik=*dH2bMYDUpiBX7Pm+Vm(VUo*{565AYc@+z;6fD8V76 zyP1B3FWs={&WW_$Z1Zbot#%rG8Ct#kx&Ua@gskR zz^a=oFd=PQ(Lf3CUC{TTV1Rl~KqI)#b zqS@c6@$A~n=~F%r#HU7cpi^J<=kS%DYymG5_0diPn9GI)boV91%SIP*lec1w*w~+N gPx# diff --git a/data_process/__pycache__/data_splitter.cpython-39.pyc b/data_process/__pycache__/data_splitter.cpython-39.pyc deleted file mode 100644 index f97444e2e1a92627bf04ebcfea8118b02b8a54c7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1713 zcmZ`)OK%)S5bmD$jCYL*w!xe>5-3`v*gjh*fN(%UU`2{W#0Y6LneJVWGmoXa7va?y z5o{%qNG_Ig04Gv71}O;~IE6pJg%hW~Mv3j8Kok&FGmf)Cp+{X?UGq(Mb=6n3al0K5 z7(2h+>8B1M4{$L)ELgk?Q!ayGgwc%TG^CU;gPEBTn(%97R&IxO?u1Tlw=*~QLXVOU z33HgcO_K)XAD_u$)hk85z`*Bd z&;+mvzjN>wV7rbsSWi1D7JaU?-()2n zaqWE>XOTp$CE|jWc_dY=xON0rLs2Bat)npk8lL_>O!+5>(T*Y&t<1^>Y0Qyz%IF0{ zbedIRAlI&0pBtObrVB`?vHkq)9LFWE3dy%j}Y4CCzZjT=R*Orl6&6Rzu3 z8v~|&Jo5tqUyBaB&~`5?W2McsP{Ky!n`mtxwu^*k8Eo+^40u_Au@Ei@OpAOSghwr7 z+9ed8$A)1Y2Em}m{;F}^Ku=;WpZ1ed2<#i$89=qiQebz0zOXTXq&NPeE8(m1LdIiJ p%)=EqGzgvn$sxrx9qNt8JbRWU@9icer|_>rVgqbZk518+@h{Sk#@7G< diff --git a/data_process/__pycache__/feature_engineer.cpython-39.pyc b/data_process/__pycache__/feature_engineer.cpython-39.pyc deleted file mode 100644 index 98c7c740996c44b147a977bffcd9150250fd5b5a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3098 zcmZ{m&5s;M6~L?dyL&!%z1}3d!E%PgBpDFiC?FwOQ9xuJ4#ZZ(3KII#>P*$l&bX(0 zsICFesKxw-mhL&+-L*@uHYYk>d87GpJQY7vY_!jc=CNH7-2LaLmE;_n8D1%2u=OALL1&z z;tbu;9eSas`|ZRZ24OI)g|%TltZSK*G=|NvIa~|ZhU?)vB|jj{W!`PVJWdbH@EkMu zNZUWAFkPFf#%?U*H$~Rrd7g=55R&U{Ls`51j#Sng{am(9)!dVD%3{IxI&s2UKI7yzKkN_z|aC6COjyVo9l4o7_Xcs zUC{vPeX9JZ^mY{8C;!>q+0TZ2CuT7GH$|4oo&7kI{W#sZ_Qsn@EW4Q)?l6EL8kH36 zbW2A>yz<25(L|hsk!$ee8fIEIEDHC3r(0*dc6n)Q6SUiwGFTV>jLZFuDYug)<6)XB zM{sa;+CiT!F8ntf{55#;kD!?D7bc@)A}$nkKqMU)J<5zbbYRZR!YIhV`Z*QfmNtXd zU2@Mjpzl(Y7REN2S%o!s3R;-kGa}Rq)rbY5Rzjo+P1brIWiVKF10m^ z-Vk4bzK#vW;zDqjX#EQs)88Ue1p-CyKu7@)gvJ5jj2niyEU7dKNIrLrnVVh$sRhzq zkeW+rQ$k8A%q4j4+%D|GIWlHmVGo?box4Y5o6Nl|q5dC?Bd9qmqkAdjVf?ENAsNWhyA3zLNKs^5C`wu_3-=<~kwZo|R2K>E1 ze@vfWg|doHs{{x_euAf8W^-V2>MyaJK3!p`fkQ$U@x9Wz-dRc_`Yo752W4QVk&flcH}cI)QAe z4a!6qyd9Nw&cbtx+LA)?0)Bml)U{G=sf1JJ7ok8@o76Kr0Mr9qn?@ag-JJOsoM+ z^xmq}s-(s#sYV=h%~Mj$kT`;MS~_=`&726u2D~oZy=1f171^*geYRw`gKCKG zV|vJNLm4BcYBGaK%c#Lt2FDq+NZHyTWr7vTt;~U}390%7;Ir&nE9`y?jQ*bpZfT?V za?T}-ufiA0w7CIkvb+t7ej3kQuf20SAbMBsBa zPOx5&qMwc9r0Q{_h-Do(iy*7|SRlRwHRbQZ5#n_LpX;S6FgW5x>@oYP6fa}*yVz`0 zhm#IZ5*X%W4Q`?eZW#hMQ#|14!-hvKW7E5M{=$})w>MN1BCP_$r>Yb5yHC{_yBJ-K3kwmwI`dWkBca@MI@IaKdR!Jbbg5c9_RHhG)=X-1JO6 zZ+RBHjo9uuoSC-J4hUu3aC$d13s{`%OBt5p3gLo`5I24}>H` zD%ptU#5Zy>G)Dwlmi|K4`!pzwU%fQ`#p~lYE)>T1&2BVo<-}>b9VKnA(hlWGXoo_% zzMll0(D#*%voLaxAEyELJ9@W%Gfi4iTP@CYF>YBHx2_Pe06zcujrGm66Rrom6D3DQ zn#%RfAeB*&te-mhR2;}wDmv>tkb&P7X*0~S_0pWs!C33|aUW3fjMozhb zSUGRJdEXY3=FrL^x?9E>!flh7WE+r{tbB!J8^;LM_tPMA@|-<%xHF>A2Hn%3Dc$~M zO14Z&1`q4`3U_5SH@CGn)I3Mdkf*`!mieNYREzg9atuZQAC=f2ePiH7+5{oI{H-z zo*O33l!J|p!GouJO+1lSFOK_lJo%8c9!`y^+cj=vl8+X4(xCW#UG?N*1(2jWFTjGBZkAsj58rLNn~jC{2_VqBmt^QZy`4 z10Ance3jn?sw&#@>=KkT)bK0BB4~;V2*@ZTI}Cy_MqRj<*b4mufXQY{^e~-x;A%mz~CLa?u@RPo8mRCTj7AHsV=VpdKWIT5`R|zZ-74w@OP}l=c06T>l6I( zrJ~Q5agB{wT-mZ-v`&|Ajl~tf;(^|0;6wL*^n1uix8BKIUWyU}ffq+s5tXJmu!+xghpbHG);!~)Kw9xuu69>^w0uS;IlBiX1JfBLYv8jW0_BaTnj%ggGCfp3~f4k2A>w(5?*8+OM zI{WtEkyA6JZtZ`%gV@%Jjp`aC@F1HE-Iv!~LjMa44hEbaC*S=ZPz_4pb5IO(hD13_ zgpK{2t&*Isk}W9IpkWMkdLJ>}vf7}jTlRj-!4`Kf>VnN6wIi9Hx1mf_u}tvVB?Ak^ z8IpVpQoPasbD83iVo)sjg0yE-JYOvOd}*Iz%b4zgV*0HqwhRT{1q13fg(f6j{z!Gs+ zBp*a*l?W>La1u;pxB-ALbLm0I{&3N1asSJ_y;Qpw>p%h22a0bjYr%h8+h|mUgPW@Q zekbKUh@-C-eE<1g5SPDAj_>od>HA^{^aP?;S0T~B7BW~oiUR+WW`wR*2ye8WVLouF z$$&x!mcA@-yipPOi>Peb??RF(4CAk$VreF@Dk%r+VH`cH>#trk<3RgEFUtK|vGd3A NXR>s)q6^~!`wv$0M$-TQ diff --git a/data_process/__pycache__/method_reader_date_feature.cpython-39.pyc b/data_process/__pycache__/method_reader_date_feature.cpython-39.pyc deleted file mode 100644 index 20dfd1fba2892c0e702c69048fb1fbf7d5e60f48..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3498 zcmds3?T;J98K2pmeX)J#I4?wmE5S4&#I3kHD3uV3s!E0Q%f*Ta0>A;f9nrfhAYK~2a8)6Z>=w0LKai@ZEmes6e~kK}67l8oClskspP5}dc6#&=s4YMH z%+Aa+ViCb7y7i~lKMo`Gm2Av@bZGn#e(`xQ6j2-?M?SRxw>3v=>yD1I z_rxKp-*60g>Ves|91Ei#BTA^Tji|AuIX1eELo4}3(TX8cOQUd_ICcZ_xXo+e)0b8o1g7pzcIeDGye6b4Lq|g z>C2t{2!8QtFbNvsL>u8ga`iqMk`V&W0MAsOrT)3reGrC@-@Z2f{k!A$cCuxr3)E}2 zu!5-7^23%>Xt87se8z?Cx}n!*t}9I03vZ6PLF7@lt@iCUqOj?=#NvGOcs8cvahRJh zJ{eZI|BXEPm<7#U#LKX|uj?!8QJbxJwC#sa@F+@F*1ah4y>R8i+Qq<2nh|fWP%rV^ z4v!ivj#u)9xjH}>x|rK!Uee`kxzm%|_d`E%U5T{V1cS5%T*T=E7iP|Vc`^#NV{!(Q z!?^=DV44h|4*U<`r0WU5aSIP{9}n>e#nnWkS|5Lmu4!9%6DN8fQ(c1Zef`Qq11%v# zqYoD}&|gN>B#59bfNofM62z3KWfB&%=PFu-Hvq9hcJ1#B_ix^uh1TBATeoifC4F%A#PY*j z?s@GXt!5`w&2G5b^aEB6qomr5x*@H4NmXF}wb4zQPdu}Wh0Vf7L;*bY^wCS*hP;|) zHwb!F2^L1H*)`?C(ubetJmRVkrp$LfTiq>j0*5S2_BxO=7KhKn+VvIBW1FRRg>6kf= zF;h6q%pn-b3t%eni;sgrnucxombD5#3qUfpGCqzcN0pIy`2c?WU!rMzWpnStKW0Le zqiJalO;fUGfI4+?A_w1$DSiYFnIDzr7?^r``XDe(15fZ&oyXJt{m)jpygmMiG*V|~ z?zaTZN8xwy{bF^$r{={%>ECpXT-)ZZSi*NI81FzMJ=a15oJfM>A!HCAjkKXYLLebp zU*k`Kj1ZYq`WoaDU6Ps_AR*>eEf)c!w+$iyqLHpnL?W^K`aKZ>fApzX$_3PAA*2JwIB+!A z``D>O-6Zfq(kTdgBZ+ud(q#PQ=J<;*#&3MN_uDu2{`_uD7euBFQKBqva6jXrnTdd~ z;!GeQ37c^OS{(~3>x$FIvYUhilt^XhaFz@GJ)MZ*Iv<8LcQyVvJd|Ir6$A-DEx0^( zp~HMZe#UySFue{)1?At8L!~~b@%$usHKN3Tp9SAJH2GioPPhcceP9tMATDH{nLSO; zt>6tk=ePc*Co>I0Mm4Z2xNT{L7CGw9< zXA-_DEv-UTkp7bbo~ZU5`pMY=q#!^FR0e%rqU|-ri?ZFq0l9@-cU?*gVsZ9$b zKs$KFsaI;j-qgE=skf*+IioB+dg!V}D^n(r4{+~0zy%}cf-yAd;l%12Gh8^5wdv6* z7x2KiYycP3Q*l97gU7n^mNPEwz4zAmvyU<&-2P~L|N8H4-+1%Z-$#2tzh19lF7GzA zvYV@>OEam^59bqD4LfK42ZJEzoM@tlk=3!AXx?adwwwCBCp9=wqhUuF8HGOk{5KDn#vd8#eoDgGXkzazcazQ%HDPZ z3b|Iqdro<}b*SW^{5y!2MQf_KCuIO_u0^Lv#PQU*m zckRW>gsG2Y_njy9|6ZkDD{%|f5JlH*N3;uv?urH1-ROEjUN&2R{){yDQj_vj{UK{lxkM}v(Z)ox&GO-LrM$#tPp!Zof`>4ZUI!F1i?7Q! pS`>3B5WZJmbAH zj;khDcD&?N2uxZ*?AV(;cB6sb|qxidEQcHsxqBcHi1 z=bn3g?s=Yb?9R^S34C7u6Ok?^wVfvb4Ta2C~72^)rBGrl_v?u&pBDB+foK&dL&brBR z4d*65;}bjuqaLv-Beue*tulqF>%`VPooQDn)0wfZ*+$kgnFT#F>*ZJ;dX|@ie&LG3 zX4owB^Q;J`nG-W-ykt3IPkAo$cpJPxh8rIbKX~WP%a^uqU*EZLb8vZM@T>Q0bmCap z#*_UBzVT5I3F*^B*`z(QH%uQUU z#iN=R$BWs68rAJ6yT_RLI&w=cbpBjCWnVS^apnDE96RUHLl2wh8?om0PV^)&y zX=`#f)VxTZCC`D~RqcWn=F|5QS%MJ=v0QrXulILu-<}Mu;l`_<-uz2B*DjtpeA;)xz*z1c=6B{uJz8jgnKivs@{( zsO5!1!-Lwn*sXgq?6`%?G(W6IBKPErHLsQUQ7Ci|cN1!y@GYIsf{tBCf0fe$>OwZ^X(L@$ z0GgV)qGUv)cRm54(Sc}|G9sEyte*Zk(Xe9LXLGp5K+-9#>KF8L*;|rM2}ozYeINYL zonL$i+3U`)*9X_ux8HgtWuL)ozueh;U+kA89@pFevO`J>v0a!LY~$@iWDPG}-@dgq zeEV;yk%}rbOh}@@ytu~w6x&8>K`b==I0i0p&bvX|bC%tZ1=t;JaA1#lNV^E}?OT5x zT>InTrCaGf;!xC10v`^?Ac578LB>`}(^FAuU~uc=;G>Vg!0& z3&U*z6*B%+R7L)i`g{Re6+@C8e;C?!ahJ#EMh+QyXm0e|vH^~G*o6~u2sVZ+J8|fd zZyo{d_;C=(k(zptYVaud{KqoCtOpu*EgcXB>x{eIht5o<=P%q2d|OBlDytMe4;$AM z+&=pt_z=}Hb1y;R)cpqom@(xE^8B$?$kGt#P(1Zi1j~8C^VsjHhe(eeBC84|E03<^ z0f@9s_-V-Tu;M)^DzB=T<-rmFb;6Q5Vd-hG1Q5+ELH1s`u<|rO4=rtsE$Nvh15Tp# zDTC5+6G#ABkma*hz5!>GV})^RMz(OpEVfwjikX5^Rt(x(wDa%;l=Z&BN{KlI<@vPF z_Kl%TGbqcqgfbS)``Z`+DU^qAzB;)6Zi?gWch`4r{C4~1E1&*-bNKTc)r!i$28#S4 z6ki9?HPbsRcki3182oTL&vzH5C&yHp(K$bc_OQqmMC4lB4{Lra@I>Ba=iM-I8(u7C znIBJd?*S|Xe$9O$@SM2jg)aA_Smei+iW+)adsZ%`-rlBMrKmXS5TC=4Q5+6u! zF{^6+cb96Vz)jdg ziQ@B&EL*bp!txuph|!4_)fCGp&KFC?Z*rWeGm?HrQ6AQe+ fe?b@Rh9eW|aQ|)DI+Z diff --git a/data_process/data_cleaner.py b/data_process/data_cleaner.py deleted file mode 100644 index 58532e1..0000000 --- a/data_process/data_cleaner.py +++ /dev/null @@ -1,66 +0,0 @@ -from .data_processor import DataProcessor -import pandas as pd -import numpy as np -from typing import Dict, List -from sklearn.impute import SimpleImputer -from sklearn.ensemble import IsolationForest -from scipy import stats - -class DataCleaner(DataProcessor): - """数据清洗类""" - - def __init__(self, config: Dict = None): - super().__init__(config) - self.missing_value_methods = { - 'mean': SimpleImputer(strategy='mean'), - 'median': SimpleImputer(strategy='median'), - 'mode': SimpleImputer(strategy='most_frequent'), - 'constant': SimpleImputer(strategy='constant') - } - - def handle_missing_values(self, df: pd.DataFrame, method: str = 'mean', columns: List[str] = None) -> pd.DataFrame: - """处理缺失值""" - try: - if columns is None: - columns = df.select_dtypes(include=[np.number]).columns - - if method not in self.missing_value_methods: - raise ValueError(f"Unsupported method: {method}") - - imputer = self.missing_value_methods[method] - df[columns] = imputer.fit_transform(df[columns]) - - self.logger.info(f"Successfully handled missing values using {method} method") - return df - - except Exception as e: - self.logger.error(f"Error handling missing values: {str(e)}") - raise - - def remove_duplicates(self, df: pd.DataFrame, subset: List[str] = None) -> pd.DataFrame: - """删除重复值""" - try: - original_shape = df.shape - df = df.drop_duplicates(subset=subset) - self.logger.info(f"Removed {original_shape[0] - df.shape[0]} duplicate rows") - return df - except Exception as e: - self.logger.error(f"Error removing duplicates: {str(e)}") - raise - - def detect_outliers(self, df: pd.DataFrame, method: str = 'zscore', threshold: float = 3) -> pd.DataFrame: - """检测异常值""" - try: - if method == 'zscore': - z_scores = np.abs(stats.zscore(df.select_dtypes(include=[np.number]))) - outliers = (z_scores > threshold).any(axis=1) - elif method == 'isolation_forest': - iso_forest = IsolationForest(contamination=0.1, random_state=42) - outliers = iso_forest.fit_predict(df.select_dtypes(include=[np.number])) == -1 - - self.logger.info(f"Detected {sum(outliers)} outliers using {method} method") - return df[~outliers] - - except Exception as e: - self.logger.error(f"Error detecting outliers: {str(e)}") - raise \ No newline at end of file diff --git a/data_process/data_processor.py b/data_process/data_processor.py deleted file mode 100644 index f5ed18c..0000000 --- a/data_process/data_processor.py +++ /dev/null @@ -1,63 +0,0 @@ -import pandas as pd -import numpy as np -from typing import Dict, List, Union, Optional -from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler -from sklearn.impute import SimpleImputer -from sklearn.model_selection import train_test_split -import logging -import hashlib -from datetime import datetime -import json -import os - -class DataProcessor: - """数据处理基类""" - - def __init__(self, config: Dict = None): - self.config = config or {} - self.logger = logging.getLogger(__name__) - self._setup_logging() - - def _setup_logging(self): - """设置日志""" - logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' - ) - - def load_data(self, file_path: str) -> pd.DataFrame: - """加载数据""" - try: - file_type = file_path.split('.')[-1].lower() - if file_type == 'csv': - df = pd.read_csv(file_path, **self.config.get('csv_params', {})) - elif file_type == 'parquet': - df = pd.read_parquet(file_path) - elif file_type == 'hdf5': - df = pd.read_hdf(file_path) - else: - raise ValueError(f"Unsupported file type: {file_type}") - - self.logger.info(f"Successfully loaded data from {file_path}") - return df - - except Exception as e: - self.logger.error(f"Error loading data: {str(e)}") - raise - - def save_data(self, df: pd.DataFrame, file_path: str): - """保存数据""" - try: - file_type = file_path.split('.')[-1].lower() - if file_type == 'csv': - df.to_csv(file_path, index=False) - elif file_type == 'parquet': - df.to_parquet(file_path) - elif file_type == 'hdf5': - df.to_hdf(file_path, key='data') - - self.logger.info(f"Successfully saved data to {file_path}") - - except Exception as e: - self.logger.error(f"Error saving data: {str(e)}") - raise \ No newline at end of file diff --git a/data_process/data_splitter.py b/data_process/data_splitter.py deleted file mode 100644 index 0018422..0000000 --- a/data_process/data_splitter.py +++ /dev/null @@ -1,49 +0,0 @@ -from .data_processor import DataProcessor -import pandas as pd -from typing import Dict, Tuple -from sklearn.model_selection import train_test_split - -class DataSplitter(DataProcessor): - """数据集划分类""" - - def __init__(self, config: Dict = None): - super().__init__(config) - - def train_val_test_split( - self, - df: pd.DataFrame, - target: str, - test_size: float = 0.2, - val_size: float = 0.2, - random_state: int = 42 - ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - """划分训练集、验证集和测试集""" - try: - # 首先划分训练集和测试集 - train_val, test = train_test_split( - df, - test_size=test_size, - random_state=random_state, - stratify=df[target] if df[target].dtype == 'object' else None - ) - - # 再划分训练集和验证集 - train, val = train_test_split( - train_val, - test_size=val_size, - random_state=random_state, - stratify=train_val[target] if train_val[target].dtype == 'object' else None - ) - - self.logger.info(f""" - Data split complete: - - Training set: {train.shape[0]} samples - - Validation set: {val.shape[0]} samples - - Test set: {test.shape[0]} samples - """) - - return train, val, test - - except Exception as e: - self.logger.error(f"Error splitting data: {str(e)}") - raise \ No newline at end of file diff --git a/data_process/feature_engineer.py b/data_process/feature_engineer.py deleted file mode 100644 index 57105e8..0000000 --- a/data_process/feature_engineer.py +++ /dev/null @@ -1,77 +0,0 @@ -from .data_processor import DataProcessor -import pandas as pd -import numpy as np -from typing import Dict, List -from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler -from sklearn.feature_selection import SelectKBest, chi2, f_classif - -class FeatureEngineer(DataProcessor): - """特征工程类""" - - def __init__(self, config: Dict = None): - super().__init__(config) - self.scalers = { - 'standard': StandardScaler(), - 'minmax': MinMaxScaler(), - 'robust': RobustScaler() - } - - def scale_features(self, df: pd.DataFrame, method: str = 'standard', columns: List[str] = None) -> pd.DataFrame: - """特征缩放""" - try: - if columns is None: - columns = df.select_dtypes(include=[np.number]).columns - - if method not in self.scalers: - raise ValueError(f"Unsupported scaling method: {method}") - - scaler = self.scalers[method] - df[columns] = scaler.fit_transform(df[columns]) - - self.logger.info(f"Successfully scaled features using {method} method") - return df - - except Exception as e: - self.logger.error(f"Error scaling features: {str(e)}") - raise - - def select_features(self, df: pd.DataFrame, target: str, method: str = 'chi2', k: int = 10) -> pd.DataFrame: - """特征选择""" - try: - X = df.drop(columns=[target]) - y = df[target] - - if method == 'chi2': - # 要求输入x不能为负的 - selector = SelectKBest(chi2, k=k) - elif method == 'f_classif': - selector = SelectKBest(f_classif, k=k) - else: - raise ValueError(f"Unsupported feature selection method: {method}") - - X_selected = selector.fit_transform(X, y) - selected_features = X.columns[selector.get_support()].tolist() - - self.logger.info(f"Selected {len(selected_features)} features") - return df[selected_features + [target]] - - except Exception as e: - self.logger.error(f"Error selecting features: {str(e)}") - raise - - def create_datetime_features(self, df: pd.DataFrame, datetime_column: str) -> pd.DataFrame: - """创建时间特征""" - try: - df[datetime_column] = pd.to_datetime(df[datetime_column]) - df[f'{datetime_column}_year'] = df[datetime_column].dt.year - df[f'{datetime_column}_month'] = df[datetime_column].dt.month - df[f'{datetime_column}_day'] = df[datetime_column].dt.day - df[f'{datetime_column}_weekday'] = df[datetime_column].dt.weekday - df[f'{datetime_column}_is_weekend'] = df[datetime_column].dt.weekday.isin([5, 6]) - - self.logger.info(f"Created datetime features from {datetime_column}") - return df - - except Exception as e: - self.logger.error(f"Error creating datetime features: {str(e)}") - raise \ No newline at end of file diff --git a/data_process/test_data_processor.py b/data_process/test_data_processor.py deleted file mode 100644 index 317c35b..0000000 --- a/data_process/test_data_processor.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -import pandas as pd -import numpy as np -from data_processor import DataProcessor -from data_cleaner import DataCleaner -from feature_engineer import FeatureEngineer -from data_splitter import DataSplitter - -class TestDataProcessor(unittest.TestCase): - def setUp(self): - # 创建测试数据 - self.test_data = pd.DataFrame({ - 'feature1': [1, 2, np.nan, 4, 5], - 'feature2': ['A', 'B', 'A', 'B', 'C'], - 'target': [0, 1, 0, 1, 0] - }) - - def test_data_cleaner(self): - cleaner = DataCleaner() - cleaned_data = cleaner.handle_missing_values(self.test_data.copy()) - self.assertFalse(cleaned_data.isnull().any().any()) - - def test_feature_engineer(self): - engineer = FeatureEngineer() - scaled_data = engineer.scale_features(self.test_data.copy()) - self.assertTrue('feature1' in scaled_data.columns) - - def test_data_splitter(self): - splitter = DataSplitter() - train, val, test = splitter.train_val_test_split(self.test_data.copy(), 'target') - self.assertEqual(len(train) + len(val) + len(test), len(self.test_data)) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/data_process/ttt.py b/data_process/ttt.py deleted file mode 100644 index 3d0bf95..0000000 --- a/data_process/ttt.py +++ /dev/null @@ -1,2 +0,0 @@ -from data_processor import DataProcessor - diff --git a/doc/接口文档code.md b/doc/接口文档code.md index 84c631c..8daf658 100644 --- a/doc/接口文档code.md +++ b/doc/接口文档code.md @@ -914,6 +914,155 @@ MLPlatform/ - 资源使用预警 - 自动清理机制 +## 5. 前端设计 +### 5.1 技术栈 +- Vue3: 前端框架 +- TypeScript: 编程语言 +- Element Plus: UI组件库 +- Axios: HTTP请求库 +- ECharts: 数据可视化库 +- Pinia: 状态管理 +- Vue Router: 路由管理 + +### 5.2 目录结构 +``` +frontend/ +├── src/ +│ ├── api/ # API接口封装 +│ │ ├── data.ts # 数据处理相关接口 +│ │ ├── model.ts # 模型管理相关接口 +│ │ └── system.ts # 系统监控相关接口 +│ ├── components/ # 公共组件 +│ │ ├── DataTable/ # 数据表格组件 +│ │ ├── ModelCard/ # 模型卡片组件 +│ │ └── Charts/ # 图表组件 +│ ├── views/ # 页面组件 +│ │ ├── data/ # 数据处理相关页面 +│ │ ├── model/ # 模型管理相关页面 +│ │ └── system/ # 系统监控相关页面 +│ ├── store/ # 状态管理 +│ ├── router/ # 路由配置 +│ └── utils/ # 工具函数 +├── public/ # 静态资源 +└── package.json # 项目配置 +``` + +### 5.3 页面设计 +1. 数据处理模块 + - 数据集列表页 + - 展示所有可用数据集 + - 支持数据集预览和基本统计信息 + - 数据集处理状态追踪 + - 数据预处理页 + - 预处理方法选择和配置 + - 参数可视化调整 + - 处理进度实时展示 + - 特征工程页 + - 特征工程方法选择 + - 特征重要性可视化 + - 数据分布展示 + +2. 模型管理模块 + - 模型列表页 + - 展示可用算法和模型 + - 模型详细信息查看 + - 模型对比功能 + - 模型训练页 + - 训练参数配置 + - 训练过程监控 + - 训练结果可视化 + - 模型评估页 + - 多指标评估结果 + - 预测结果分析 + - 模型解释性展示 + +3. 系统监控模块 + - 资源监控页 + - CPU/GPU使用率图表 + - 内存使用情况 + - 系统负载监控 + - 训练历史页 + - 实验记录列表 + - 训练详情查看 + - 实验对比分析 + - 日志查看页 + - 日志实时展示 + - 日志级别筛选 + - 日志搜索功能 + +### 5.4 交互设计 +1. 数据处理流程 + ```mermaid + graph LR + A[上传数据] --> B[数据预览] + B --> C[预处理配置] + C --> D[特征工程] + D --> E[数据划分] + E --> F[处理完成] + ``` + +2. 模型训练流程 + ```mermaid + graph LR + A[选择数据] --> B[选择算法] + B --> C[参数配置] + C --> D[开始训练] + D --> E[监控进度] + E --> F[查看结果] + ``` + +### 5.5 组件设计 +1. 通用组件 + - 数据表格组件 + - 图表展示组件 + - 参数配置表单 + - 进度展示组件 + - 文件上传组件 + +2. 业务组件 + - 数据预处理配置组件 + - 模型训练配置组件 + - 评估结果展示组件 + - 系统监控面板组件 + +### 5.6 状态管理 +1. 全局状态 + - 用户配置信息 + - 系统运行状态 + - 全局加载状态 + +2. 模块状态 + - 数据处理状态 + - 模型训练状态 + - 系统监控数据 + +### 5.7 性能优化 +1. 数据处理 + - 大数据分页加载 + - 数据缓存机制 + - 延迟加载策略 + +2. 交互优化 + - 防抖和节流 + - 骨架屏加载 + - 虚拟滚动列表 + +3. 可视化优化 + - 图表按需渲染 + - 数据分片处理 + - WebWorker处理大数据 + +### 5.8 错误处理 +1. 全局错误处理 + - API请求错误 + - 组件渲染错误 + - 路由错误处理 + +2. 用户提示 + - 操作成功提示 + - 错误信息展示 + - 加载状态反馈 + ## 附录A:方法详细说明 ### A1. 数据预处理方法 diff --git a/test_data_processor.py b/test_data_processor.py deleted file mode 100644 index cdde864..0000000 --- a/test_data_processor.py +++ /dev/null @@ -1,93 +0,0 @@ -import unittest -import pandas as pd -import numpy as np -from pathlib import Path -from function.data_processor_date import DataProcessor - -class TestDataProcessor(unittest.TestCase): - def setUp(self): - self.processor = DataProcessor() - - # 创建测试数据 - self.test_data = pd.DataFrame({ - 'feature1': [1, 2, np.nan, 4, 5], - 'feature2': [10, 20, 30, 40, 50], - 'target': [0, 1, 0, 1, 0] - }) - - # 保存测试数据 - self.input_path = 'dataset/dataset_raw/test_data.csv' - Path(self.input_path).parent.mkdir(parents=True, exist_ok=True) - self.test_data.to_csv(self.input_path, index=False) - - # 设置输出目录 - self.output_dir = 'dataset/dataset_processed' - - def test_process_dataset(self): - # 定义处理方法 - cleaning_methods = [ - { - 'method_name': 'SimpleImputer', - 'params': {'strategy': 'mean'} - } - ] - - feature_methods = [ - { - 'method_name': 'StandardScaler', - 'params': {} - } - ] - - split_params = { - 'test_size': 0.2, - 'val_size': 0.2 - } - - # 处理数据集 - result = self.processor.process_dataset( - self.input_path, - self.output_dir, - cleaning_methods, - feature_methods, - split_params - ) - - # 验证结果 - self.assertEqual(result['status'], 'success') - self.assertIn('process_record', result) - - # 验证输出文件 - record = result['process_record'] - self.assertTrue(Path(record['output_files']['train']).exists()) - self.assertTrue(Path(record['output_files']['validation']).exists()) - self.assertTrue(Path(record['output_files']['test']).exists()) - - def test_invalid_method(self): - # 测试无效的方法名 - cleaning_methods = [ - { - 'method_name': 'InvalidMethod', - 'params': {} - } - ] - - result = self.processor.process_dataset( - self.input_path, - self.output_dir, - cleaning_methods, - [], - {'test_size': 0.2, 'val_size': 0.2} - ) - - self.assertEqual(result['status'], 'error') - - def tearDown(self): - # 清理测试文件 - try: - Path(self.input_path).unlink() - except: - pass - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/test_method_reader.py b/test_method_reader.py deleted file mode 100644 index 9f7140e..0000000 --- a/test_method_reader.py +++ /dev/null @@ -1,49 +0,0 @@ -import unittest -from function.method_reader_date_process import MethodReader - -class TestMethodReader(unittest.TestCase): - def setUp(self): - self.reader = MethodReader() - - def test_get_preprocessing_methods(self): - result = self.reader.get_preprocessing_methods() - self.assertEqual(result['status'], 'success') - self.assertIsInstance(result['methods'], list) - - # 检查返回的方法列表 - methods = result['methods'] - self.assertTrue(any(m['name'] == 'data_scaler' for m in methods)) - self.assertTrue(any(m['name'] == 'missing_value_handler' for m in methods)) - self.assertTrue(any(m['name'] == 'outlier_detector' for m in methods)) - - def test_get_method_details(self): - # 测试获取StandardScaler的详细信息 - result = self.reader.get_method_details('StandardScaler') - self.assertEqual(result['status'], 'success') - self.assertEqual(result['method']['name'], 'StandardScaler') - - # 检查返回的详细信息字段 - method = result['method'] - self.assertIn('description', method) - self.assertIn('principle', method) - self.assertIn('advantages', method) - self.assertIn('disadvantages', method) - self.assertIn('applicable_scenarios', method) - self.assertIn('parameters', method) - - # 检查参数信息 - parameters = method['parameters'] - self.assertIsInstance(parameters, list) - if parameters: - param = parameters[0] - self.assertIn('name', param) - self.assertIn('type', param) - self.assertIn('default', param) - self.assertIn('description', param) - - # 测试获取不存在的方法 - result = self.reader.get_method_details('NonExistentMethod') - self.assertEqual(result['status'], 'error') - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/test_model_manager.py b/test_model_manager.py deleted file mode 100644 index 47b6471..0000000 --- a/test_model_manager.py +++ /dev/null @@ -1,99 +0,0 @@ -import pytest -import pandas as pd -import numpy as np -import mlflow -from pathlib import Path -from function.model_manager import ModelManager - -class TestModelManager: - @pytest.fixture - def model_manager(self): - return ModelManager() - - @pytest.fixture - def sample_data(self): - # 创建测试数据 - np.random.seed(42) - n_samples = 100 - X = np.random.randn(n_samples, 4) - y = (X[:, 0] + X[:, 1] > 0).astype(int) - - # 保存测试数据 - data_dir = Path("dataset/dataset_processed/test_data") - data_dir.mkdir(parents=True, exist_ok=True) - - df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(4)]) - df['label'] = y - - data_path = data_dir / "test_data.csv" - df.to_csv(data_path, index=False) - - return str(data_path) - - @pytest.fixture - def trained_model(self, sample_data): - # 训练一个简单的模型用于测试 - from sklearn.ensemble import RandomForestClassifier - - # 加载数据 - data = pd.read_csv(sample_data) - X = data.drop('label', axis=1).values - y = data['label'].values - - # 训练模型 - model = RandomForestClassifier(n_estimators=10, random_state=42) - model.fit(X, y) - - # 使用MLflow记录模型 - with mlflow.start_run() as run: - mlflow.sklearn.log_model(model, "model") - mlflow.log_param("algorithm", "RandomForestClassifier") - - return run.info.run_id - - def test_predict(self, model_manager, sample_data, trained_model): - # 设置输出路径 - output_dir = Path("predictions/test") - output_dir.mkdir(parents=True, exist_ok=True) - output_path = str(output_dir / "test_predictions.csv") - - # 执行预测 - result = model_manager.predict( - run_id=trained_model, - data_path=sample_data, - output_path=output_path, - metrics=['accuracy', 'f1'] - ) - - # 验证结果 - assert result['status'] == 'success' - assert 'prediction' in result - assert Path(result['prediction']['output_file']).exists() - assert result['prediction']['samples_count'] == 100 - assert 'accuracy' in result['prediction']['metrics'] - assert 'f1' in result['prediction']['metrics'] - - # 验证预测结果格式 - predictions = pd.read_csv(output_path) - assert 'prediction' in predictions.columns - assert len(predictions) == 100 - - def test_predict_invalid_run_id(self, model_manager, sample_data): - result = model_manager.predict( - run_id="invalid_run_id", - data_path=sample_data, - output_path="predictions/test/invalid.csv" - ) - - assert result['status'] == 'error' - assert '未找到运行ID' in result['message'] - - def test_predict_invalid_data_path(self, model_manager, trained_model): - result = model_manager.predict( - run_id=trained_model, - data_path="invalid/path/data.csv", - output_path="predictions/test/invalid.csv" - ) - - assert result['status'] == 'error' - assert '数据加载失败' in result['message'] \ No newline at end of file diff --git a/test_model_trainer.py b/test_model_trainer.py deleted file mode 100644 index efb19c2..0000000 --- a/test_model_trainer.py +++ /dev/null @@ -1,85 +0,0 @@ -import unittest -import numpy as np -from function.model_trainer import ModelTrainer - -class TestModelTrainer(unittest.TestCase): - def setUp(self): - self.trainer = ModelTrainer() - - # 创建测试数据 - np.random.seed(42) - self.X_train = np.random.randn(100, 5) - self.y_train = np.random.randint(0, 2, 100) - self.X_val = np.random.randn(30, 5) - self.y_val = np.random.randint(0, 2, 30) - - def test_train_model(self): - # 准备训练数据 - train_data = { - 'features': self.X_train, - 'labels': self.y_train - } - - val_data = { - 'features': self.X_val, - 'labels': self.y_val - } - - # 模型配置 - model_config = { - 'algorithm': 'LogisticRegression', - 'task_type': 'classification', - 'params': { - 'random_state': 42 - } - } - - # 训练模型 - result = self.trainer.train_model( - train_data, - val_data, - model_config, - 'test_experiment' - ) - - # 验证结果 - self.assertEqual(result['status'], 'success') - self.assertIn('run_id', result) - self.assertIn('metrics', result) - - # 验证指标 - metrics = result['metrics'] - self.assertIn('accuracy', metrics) - self.assertIn('precision', metrics) - self.assertIn('recall', metrics) - self.assertIn('f1', metrics) - - def test_invalid_algorithm(self): - # 测试无效的算法名 - train_data = { - 'features': self.X_train, - 'labels': self.y_train - } - - val_data = { - 'features': self.X_val, - 'labels': self.y_val - } - - model_config = { - 'algorithm': 'InvalidAlgorithm', - 'task_type': 'classification', - 'params': {} - } - - result = self.trainer.train_model( - train_data, - val_data, - model_config, - 'test_experiment' - ) - - self.assertEqual(result['status'], 'error') - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/test_system_monitor.py b/test_system_monitor.py deleted file mode 100644 index b591fcb..0000000 --- a/test_system_monitor.py +++ /dev/null @@ -1,86 +0,0 @@ -import pytest -from function.system_monitor import SystemMonitor -from typing import Dict - -class TestSystemMonitor: - @pytest.fixture - def system_monitor(self): - return SystemMonitor() - - def test_get_system_resources(self, system_monitor): - """测试获取系统资源信息""" - result = system_monitor.get_system_resources() - - # 验证返回格式 - assert isinstance(result, dict) - assert 'status' in result - assert result['status'] == 'success' - assert 'resources' in result - assert 'timestamp' in result - - resources = result['resources'] - - # 验证GPU信息 - assert 'gpu' in resources - if resources['gpu']: # 如果有GPU - gpu = resources['gpu'][0] - assert 'id' in gpu - assert 'name' in gpu - assert 'memory' in gpu - assert 'utilization' in gpu - assert 'temperature' in gpu - - # 验证CPU信息 - assert 'cpu' in resources - cpu = resources['cpu'] - assert 'count' in cpu - assert 'utilization' in cpu - assert 'memory' in cpu - assert 'swap' in cpu - - # 验证内存信息 - memory = cpu['memory'] - assert 'total' in memory - assert 'used' in memory - assert 'free' in memory - assert 'percent' in memory - assert memory['total'] > 0 - assert 0 <= memory['percent'] <= 100 - - # 验证磁盘信息 - assert 'disk' in resources - assert len(resources['disk']) > 0 - for mount_point, disk_info in resources['disk'].items(): - assert 'total' in disk_info - assert 'used' in disk_info - assert 'free' in disk_info - assert 'percent' in disk_info - assert disk_info['total'] > 0 - assert 0 <= disk_info['percent'] <= 100 - - # 验证进程信息 - assert 'processes' in resources - processes = resources['processes'] - assert 'total' in processes - assert 'running' in processes - assert 'sleeping' in processes - assert processes['total'] > 0 - assert processes['running'] >= 0 - assert processes['sleeping'] >= 0 - - def test_error_handling(self, system_monitor, monkeypatch): - """测试错误处理""" - def mock_gpu_error(*args, **kwargs): - raise Exception("GPU query failed") - - # 模拟GPU查询错误 - monkeypatch.setattr(system_monitor, '_get_gpu_info', mock_gpu_error) - - result = system_monitor.get_system_resources() - assert result['status'] == 'success' # 即使GPU查询失败,其他资源信息仍应返回 - assert result['resources']['gpu'] == [] # GPU信息应为空列表 - - # 验证其他资源信息仍然可用 - assert 'cpu' in result['resources'] - assert 'disk' in result['resources'] - assert 'processes' in result['resources'] \ No newline at end of file diff --git a/处理乳腺癌数据集.py b/处理乳腺癌数据集.py deleted file mode 100644 index a84aa3b..0000000 --- a/处理乳腺癌数据集.py +++ /dev/null @@ -1,6 +0,0 @@ -import pandas as pd -from sklearn.datasets import load_breast_cancer -cancer = load_breast_cancer() -df = pd.DataFrame(cancer.data, columns=cancer.feature_names) -df['target'] = cancer.target -df.to_csv('./dataset/breast_cancer.csv', index=False)