From b00e6dac71302d03efc383fde9d7ec0eeb450b60 Mon Sep 17 00:00:00 2001 From: haotian <2421912570@qq.com> Date: Thu, 20 Feb 2025 11:46:24 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90--=E5=AE=8C=E6=88=90=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E7=B3=BB=E7=BB=9F=E8=AE=AD=E7=BB=83=E5=8E=86=E5=8F=B2?= =?UTF-8?q?=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/接口文档code.md | 56 ++++++- example_system_moniter_train_history.py | 4 + .../__pycache__/system_monitor.cpython-39.pyc | Bin 5500 -> 8463 bytes function/system_monitor.py | 139 ++++++++++++++++++ 4 files changed, 191 insertions(+), 8 deletions(-) create mode 100644 example_system_moniter_train_history.py diff --git a/doc/接口文档code.md b/doc/接口文档code.md index 7774a2c..e4832e7 100644 --- a/doc/接口文档code.md +++ b/doc/接口文档code.md @@ -644,25 +644,64 @@ Error Response: ### 3.2 获取训练历史 ```http -GET /api/system/history +GET /api/system/history?page=1&page_size=10&start_time=2025-02-01&end_time=2025-02-19&status=completed&experiment_name=breast_cancer_classification + +Parameters: +- page: 页码 (默认: 1) +- page_size: 每页数量 (默认: 10) +- start_time: 开始时间 (可选, 格式: YYYY-MM-DD) +- end_time: 结束时间 (可选, 格式: YYYY-MM-DD) +- status: 运行状态过滤 (可选: completed, failed, running) +- experiment_name: 实验名称过滤 (可选) + Response: { "status": "success", "history": [ { - "task_id": "train_20230820_001", - "model": "xgboost", + "run_id": "7970364d490f4e0aa0375c2db26215f3", + "experiment_id": "656341556838275234", + "experiment_name": "breast_cancer_classification", + "model_name": "XGBClassifier", "dataset": "breast_cancer", - "start_time": "2023-08-20T10:00:00", - "end_time": "2023-08-20T10:30:00", + "start_time": "2025-02-19T08:43:02", + "end_time": "2025-02-19T08:43:05", + "duration": "3s", "status": "completed", + "parameters": { + "max_depth": 6, + "learning_rate": 0.1, + "n_estimators": 100 + }, "metrics": { - "accuracy": 0.95, - "f1": 0.94 + "accuracy": 0.956, + "precision": 0.962, + "recall": 0.935, + "f1": 0.948 + }, + "tags": { + "version": "v1.0", + "author": "admin" } } - ] + ], + "pagination": { + "current_page": 1, + "page_size": 10, + "total_pages": 5, + "total_items": 42 + } +} + +Error Response: +{ + "status": "error", + "message": "获取训练历史失败", + "details": { + "error_type": "MLflowError", + "error_message": "Failed to connect to MLflow server" + } } ``` @@ -1017,4 +1056,5 @@ Authorization: Bearer 3. 性能评估: - 使用多个评估指标 - 考虑模型的稳定性 + - 关注模型在特定场景的表现 - 关注模型在特定场景的表现 \ No newline at end of file diff --git a/example_system_moniter_train_history.py b/example_system_moniter_train_history.py new file mode 100644 index 0000000..8b0cbda --- /dev/null +++ b/example_system_moniter_train_history.py @@ -0,0 +1,4 @@ +from function.system_monitor import SystemMonitor + +system_monitor = SystemMonitor() +print(system_monitor.get_training_history()) \ No newline at end of file diff --git a/function/__pycache__/system_monitor.cpython-39.pyc b/function/__pycache__/system_monitor.cpython-39.pyc index 3a42c0fb4df2ec47278deaf3a31f80ec0d068c57..12f1e797364af2ebf31e24311fa4573d230271db 100644 GIT binary patch literal 8463 zcmd5>TW}lKdEWajuB0f67hPe+jD^Ia)HY31R@V3;D^4rXYADyywhjW}tVmF}>FmN1 z(S^niWjT{rwlYoPc0|>HN-{<8}Z zpy;&IrvkHQ&;6YJ&-Y(XK9!0~_}%&bd$a%k+miIpROtPwC_ISA{W=mRF}Wn!;aj$4 zQC4gPZ>6M`Ra=#*O)Y6<-PX&7ZIn&h6m@dRvMsb}rC2#`$D_K0oe*;*?IhlMDOFC} z>8NeM9{i%j3}#-En7OFfTbMi}Wv$!Thcum)1B>N7$19edtQu%XiUlvwjul-mupX~@ z#cCyALLco!-kZZadAxL{R6YMlspwR^0?p9-i=pr!9`_w2jzr&uZ<1}5DNMa2*&5TB zj<@a@jyaA!Ey>Je)+Noh98;8HERIqvDkWGFrFc|Iu{27Fs5HO^QA$RoEo=y-R8-o^ zhEYnh9nJv$gU*(5X_M4sV@oC*XWK97_7EEdO?C#UnFZH#%EzmfqF3ehZTODWpWa-( z`O4b6msWm$b@ke}S6+O5_3fJl87-Y3<>`1I!Q*C-cv4IDl(y_kIn|e1T3eDOPhXT! zHU#mN`WU(`f8+Y{>)%>_;d|i{;pEGI^~QA849rq>cD7iVwPUl6cMN%l2Uadu$(Nm6 zE-dZ~dYNyKLjB*8WNOX9vQ#$je#M5Z~@bH1TYS}rEXXRpLKd)B3 z19SPRSIk!q96xrVl=se5dHKMZdZj>Tf4~*w$dyBSOx6}?6Gadwmm9=vu7N~SlCmW? zM*2uM+3D6tDWVb$5tRBWG9DOVQEtk~I(!AA*`Ji=_k9*2|f=bLJkk^w~WbMY^t-gO_<>enN-+H;BO@de_ z3HHtfpgdPSYY+5)Wv-E$I5kn8V7ZCMCXP?cG(I=6FYkgL;1D;nKQpn9P9e&coTr^q zPr2;4ZhjW6)7d1aPYv|)Sytqnc*9N{EtZ_e@)cHcI9I4utyIt3Nu1(Q&^_;Y4!2|7 zEKks&c%HFnjBTOkFI)P>NJ!zy<23>L`EMJv#Oi&N(Vlp|V<;BkkM zNQxyV6-^%gFHO~C3u#hr?A&mMedq$s3=X*QO5d5%N7BsVOn-??<1*pDEj8s?2`45W zk)EnFRpd3Cp5m+XDjp4wj>o`bGI@#WET#-gc*p3?)FtX0_Y%I!v_*w~6{TcFVY;X> zP*X!qifWjNz8bTZOgi~i~nFUr@X>jE>g z(vtw>DWI&WU({!!_d)3(a==f#PJ~-~@o$!2`ra2$Jn_M;_R6zAftb|2s+Y$Z)Ln-K z+8ORRw;4KSl|Y@X)dQpKl&gFph}Ej+9e#x8&rb(&kv~=}7rm?kNr*~YFNvZVC`C*| zDB?RXO^~R2#Zs}6Cl(A6kViF#=RtVK*2OHCm{$uxn&Z|-@800@Yj3ap><3es>`-7} z*O<#TAw`gN10!FnITgltV;J9q#7=~pJxO0=$HT%C_^wX=D84SxO4WR~hGyjV&_>O< zybI8BzLTcgMOCp!FBBZ1I0SkF9Z0Y8Ky^KyHA1v%ppk1^NFhofZ?@Cmy);;MLbwXx z66u3f4m;0vy55T_7upr*RUmUT#Zt7SCgTqz<9|mpEZO>4(==sB9>QDsm-w)uq~#I0 zv10?ad*|%~w1(+PmkU7u6EcnObO4RF(w15(gRq*F+YngefVd~k3(a;#7CD3ISD@FH zs9t9li(P?u<0l~E3>M!Z0kB{S{xy`%o^lOkt5aqPFoeRN@s)W)^p{$(HiTU(4uP0l zG9q{<7JdbIs}a1DtrSanXA>5z3RSQG4E2rPxj*WB~@S`VhXyXsZI>LgaR6O;uf6|gIQ09)V(DfulV!JvRy2z~)ds8#|9Z6^fjebkra zG2cf?7D-kUc;>&0JgA1*X?^|~YVQ}l5saw|A^ywg;S!cZ7>{UiIS#PKl|dQ*JDQ%9 z4cYj$HSmeR?ab5%To8U8xHS|S_jiB`aKRAUF5wOEiZ`^XcJM6LQK2`pfmtVuMc6_P+l%yes1~4e;JBQ*qVdVn;IwW!%1RsvVQ_I zI*mF>fV)R^&U`u>=bu9lJNG%UQ@n+`~X-O5sHai{u1q?ikCsK?M zEl`g1*FLfgaR$3L5gt8g`G0o|MBb2~AN>5al`F6I9tQsdqSGNH^~`VRb{F*%f*Jl# zr02gwOK!p#{P)qYfajA*ztJY?W6M6m>zJo2xZgsdQ7Xa;33&ik$XA+5;|nChAy!pb zlfn)WTmV}ndc!^kSCCBtuJC2*c?d%ETpHGd<;Bh@3rvPJFT-Xym!{_+9$$|5C%*7Q z@Q)VpPok9s|D=3vQ~uE|f@8oxcyG!-tKWZi^;*O~Jx8|AD!m=n4=WwA$^a|6XLGeY z_ljgixOQAr)X6x7W+yRRt3qaP#zVot35S=9u8V<>3hiw^J|i-la+e-z^_Wkes#E7>*j0K4s z>yfx?x2~V>%zp^<7QA1aWsdc>&|9rha_6xx6q zO~{YXmNpR>!4{#HyNhDEYv|h1rb-;JQSKOkiz3G%!H4h+#}j(6%<_`dHlGUnu-KBq;xvo!SrU;B7H@%w1Q3y|7qQWJCi;b1 zpf`x|!TYyXzw^e*;&aQF-|cqmX1**uJa?*FLG$zt7fw*O8AcF^BwK|^0=EWIM;v3F z9E_E3TwVU=E1T>#_5Efc5qPn7phu#*#yO-&r1nwSA{2?0TjJ8rcOlg$lc zZyyd)wnuFkx$VU$Ro6!c$sBD|e z@$$tI{Q5NiEVjm{D0zsI$0!k|;Ze$wU(dH90lRtp3~f6EJ=u(6TY*L^a?({D8}g5+ zjqo1CqJ6lt8;!_eq58szOqVV{#Tb`NQpr?+aq<7iFq10yKu+Ev`y~zZqXc1Ue5~DR z?Cs}(%_bER;Wq7$Vo_QRN`r*@tdnz#4JU}>7`T_CFGCom!bC1SumR@+z6=c&PXwzt zh0qEVm+Jh;=_Kn|xK|W6yQ(yn5FIdmo$vL{7C6b*DSmJb?#gw+3BJmYd5WjD5H)Da zA}XW~LxMLg(O>hejz8~f{OPDCIIgE>O!U4wQ^utu(pCAXdz;&^t1bREHZ)E# zvpDEBgwj?~Qjr@TM?Zc!sz(VFuspMw`Fp}XEDMSKnXbe(o4YRV zf({pNA+ae+`PsS$FS;vHBYAb>$E!CNmtVZR{L(vXH{M!)|GV7|-ONLLc7vcM9-YeE zZoj+QdbZn*BzTv3r!p%)c?lg>zWdhg=U?jUv(D91ndM(RgV^-S%kSNO`DdAZr|{c< z{P_MON3wk*I=nxXS-treE7xE7pZbSJ$5dwR*44H43#;FJZ{?Y1*KR$(a`UzRY5Qde zc5~yeZ~yRx<*Qd#Uw>9+&*(S(p${ z#YGTY`nGZoMxK!Th`XeEq-l{Myr@hbv{h7YR1NV%B?Pkz)pG4{3|RU#W|bPBzgy6I zJdfyoWi~g5?g&v%9x7E4%5e`P)0v<#&_jSDkIp=TXimCsBW{pFoEi3B4hoJgDXc1I zD4kxgP3|Ck#N5!P{8Ma$KS{|~XhSCaf_&MX45Wrtb}+QyPBwHeKZ_~@Hfj!G&{)1S zTjfP>u56o6RL&wiUI|RxP~{Pe=6{G;0(nvxw}QF|vY)2u#X_f$Bj1KOrBL@UYU(bJ zi>^AtsBmh;P&+PmgGd*51E_hi9cX6krv51m>@{BQhS2?)<>%h6$Ee@+cF3RnODJV= zCly7$;C{85Y+K{q?yKfetEt_pOwRw{ZuW|#;BiCEnqQzv51?4B@S_XJhY z!K{NL_Fu5PT=tjPZ{xL?tabO;3k)ExXv5n>1AYYLZ@6#q1!7Yq zmJnRUm3^srI=qF{gd0*rgFO)P_$0_!I7_2RhY{K;EbsCP3S!;OJ}9sbg%mwZVSjf7 SNf@C?!o|jYqBvrt%6|iB7dm7B delta 1495 zcmb7E&u<$=6y90y+Up;?wo^B@)uwe*ZD5-wfr3($G%X^cZG@akq$t7{v$EIjs$Sc? zT?45i0&3I?Qk!Tb5Vu64hl)c*4Mlj??J*`0^Y&&RfnqpYz#xthBq)zx}*Q>sUA%9T`|tw>gE`@tDj z+s+o*EuLn#)WbM8%70*sg#yM0(hdvcUA8V&BkR(RvMVu(Te8Vienp;5SxhLt(`ww4 zK<9Vma!SV-Um<2W^&oPBF^!Ka&*~z6u>+nR1P(xguPWne3S9($U&-ev1VS*#N+OK( z?x83N#5ehcRnK=Vpe~S}(wdn-a zC$1%Q^3Nbh{cdcNjn1I;D?`AtXl@&meaCLOJP|)RI*zlIcmtfbdrrHy2Ba5rS&Vy0 z+5lc4c#(e_pD3Rqlf=R#!D$3b38lhIXjilvDh(X33sbcI&+_dnBo|6%nNKHvPv*&w zP7{j!`Q!{c%k5;Yd|;NAUM3jE9T}7<;yK?fG`o8OAP*uq#3yD*Da-V)_ln z^8C}x@#H0v{2ak${(WW=YyVqj{`LQ0BDb*flrD5?uteh_>56PfNIs75hX?&he?9vF z%Tn)FB6#{0h2((A^+PlH8(H(sDbt%W>sQUob7pzYT(Kgs#8(bo*q|QkQE%+s{`%3K zkN3XXeEiMUFW>Dx{_y_b=q$OC2ro@FSnE1MAJ`reV>xIWj1{r{v53{3z;;{yvSq+D zuD}ApMS?Ox*w?R-B@>KXK@=L^#zx@c?GEcg9Uu0-P>2%ZIDT>i$UiKJi0=#Wn+LA| ze=ApH_xX+7Rdv})L!56Ne`X_1LG-Qwy-W}xcq)$iH*BB-GFNHt>CFnQ@O6se0_EW6 z5HzO9nyMKi(?-#tFJ}~hJS;uzb(la~X#85q#CPy^sW4BA!}b;0Ij>XiwVd Dict: + """ + 获取训练历史记录 + + Args: + page: 页码 + page_size: 每页数量 + start_time: 开始时间 (YYYY-MM-DD) + end_time: 结束时间 (YYYY-MM-DD) + status: 运行状态过滤 + experiment_name: 实验名称过滤 + + Returns: + 训练历史记录 + """ + try: + # 构建过滤条件 + filter_string = [] + + if status: + filter_string.append(f"status = '{status.upper()}'") + + if start_time: + start_timestamp = int(pd.Timestamp(start_time).timestamp() * 1000) + filter_string.append(f"start_time >= {start_timestamp}") + + if end_time: + end_timestamp = int(pd.Timestamp(end_time).timestamp() * 1000) + filter_string.append(f"start_time <= {end_timestamp}") + + filter_string = " and ".join(filter_string) if filter_string else None + + # 获取实验ID + if experiment_name: + experiment = mlflow.get_experiment_by_name(experiment_name) + if experiment is None: + return { + 'status': 'error', + 'message': f'实验 {experiment_name} 不存在' + } + experiment_ids = [experiment.experiment_id] + else: + experiments = mlflow.search_experiments() + experiment_ids = [exp.experiment_id for exp in experiments] + + # 获取所有运行记录 + all_runs = [] + for exp_id in experiment_ids: + runs = mlflow.search_runs( + experiment_ids=[exp_id], + filter_string=filter_string, + order_by=['start_time DESC'] + ) + if not runs.empty: + all_runs.extend(runs.to_dict('records')) + + # 计算分页 + total_items = len(all_runs) + total_pages = (total_items + page_size - 1) // page_size + start_idx = (page - 1) * page_size + end_idx = min(start_idx + page_size, total_items) + + # 格式化运行记录 + history = [] + for run in all_runs[start_idx:end_idx]: + # 获取实验信息 + experiment = mlflow.get_experiment(run['experiment_id']) + + # 计算持续时间 + start_time = pd.to_datetime(run['start_time']) + end_time = pd.to_datetime(run['end_time']) if run['end_time'] else pd.Timestamp.now() + duration = end_time - start_time + + # 收集参数和指标 + params = {} + metrics = {} + tags = {} + for key, value in run.items(): + if key.startswith('params.'): + params[key.replace('params.', '')] = value + elif key.startswith('metrics.'): + metrics[key.replace('metrics.', '')] = value + elif key.startswith('tags.'): + tags[key.replace('tags.', '')] = value + + # 格式化记录 + history_item = { + 'run_id': run['run_id'], + 'experiment_id': run['experiment_id'], + 'experiment_name': experiment.name, + 'model_name': params.get('algorithm', 'Unknown'), + 'dataset': params.get('dataset', 'Unknown'), + 'start_time': start_time.strftime('%Y-%m-%dT%H:%M:%S'), + 'end_time': end_time.strftime('%Y-%m-%dT%H:%M:%S'), + 'duration': str(duration).split('.')[0], # 去除微秒部分 + 'status': run['status'], + 'parameters': params, + 'metrics': metrics, + 'tags': tags + } + history.append(history_item) + + self.logger.info(f"成功获取训练历史记录, 共{total_items}条记录") + + return { + 'status': 'success', + 'history': history, + 'pagination': { + 'current_page': page, + 'page_size': page_size, + 'total_pages': total_pages, + 'total_items': total_items + } + } + + except Exception as e: + error_msg = f"获取训练历史失败: {str(e)}" + self.logger.error(error_msg) + return { + 'status': 'error', + 'message': '获取训练历史失败', + 'details': { + 'error_type': type(e).__name__, + 'error_message': str(e) + } } \ No newline at end of file