From d13ce6c21af518fa677ba22a45d4f10dccf93ca7 Mon Sep 17 00:00:00 2001 From: haotian <2421912570@qq.com> Date: Mon, 24 Feb 2025 15:03:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9--=E4=BF=AE=E6=94=B9fastapi?= =?UTF-8?q?=E4=B8=AD=E7=9A=84=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- __pycache__/main.cpython-39.pyc | Bin 4599 -> 4565 bytes example_model_manager.py | 202 ++++++++++++++++---------------- main.py | 44 ++++--- model/model.yaml | 20 ++-- model/parameter.yaml | 34 +++--- 5 files changed, 156 insertions(+), 144 deletions(-) diff --git a/__pycache__/main.cpython-39.pyc b/__pycache__/main.cpython-39.pyc index afaf6e2c37e4f89fe9f7f372825c74d5e31ad0dd..dc0be9f48256d9d227915ac83bf4cd6ca6d84747 100644 GIT binary patch delta 1999 zcmZ`(TWl0n7@jjbdz*H*ExUBP?Y49&ZP^x<`@O9q$en@|FpihWu>a}qw6n9Dv$M3A zm?qK~jj5p-eGw|VK@%DiUrb083^6{N_@FQIKw<wjif1;Lq||NQfP|M}1P zFK2%4`K~9dg+j7`&(8Rb>}QeraDse0Kd8^-BT9q_AV9!~Dp8z+#`1it(ptx2O3dZj zls24YBc6{d@jBP8v?JDFbmS9Cr;9ZhU8t|9>)og)i9lnOmC#rvl^zM|h2?WTr4L#mHY+IowEw)ItfsBdhL$OsnizmMw9j!z zv57$(YoG%+SxZxppz%t&DnRE&3ECDs9YU2Lv#u&+Md+II)DA<}SBbKL_DyW0{u#Qu zvI$n`LRG}M80V}-IsnIlhWRwg&D8?>}Y9FJBkW(fCO zf4DB;*dq!;=_Pxi@#yK(0ud~sLO|wi2thndLwOi&mfpR5Xa0*j^H=X)y1evBb?L$z z_ins-_u|dl*RR}p=knc!kCxt_Em<0~%Efi1T-k!clzIE}w{Ks+cJIbpb~LnIb^>%- zFIj3~(ta+~AwQ7PMj^XMSgTzMB}ZP3jZt=%vTnZE(#>o)_mEDfQVkyxf;|`%LfkaJoa+3z5pn-~27}v9#5hyjH5QvCd zq)C`W2}|QQi=jBl+EChmdT)_!_ATG*p+U}UMUnEdVLR{N)bNmV`(uA1@dA=1_a7t% z&|4!8wg<6A5f}X6*U5l=Gyu(exub{Iei=x{&+xTw&>oS9A`asFUHXHK+P?;logP3o zC9?f|-j(lReF{hj{o`HCq@sB?swa^7Npy2JV=MTSNVP@zrK>0s*zC18igUmnuJpL5v<$1#+{ByupZ9@JwqI*e PBaOUTyk>vD;xB&z@*@#L delta 1988 zcmZ`(-ES0C6rVdY``O)X*>1Zn+pq1GkEKw)%6A0{QVSF$Qll=I$++jTopwK(yR*`_ zHByK{H)*|55-mwLF$fRF2cpp!Uo@KNlP|o?g9Ktr_zR4p#B=X%MM9m)J#*&wJNMjk z&OP_uN&KD&nZcl6!RPATZ!-78^Pvv%<9xqWPK7}MZ?-uXNkxc)x+mL`YLVa8R4aac z*=R1Biq^TdR2yRcY;Pi7RP^(+&mx_C&<{;R z|9Y#rOa|n09HI}PZ;S5nx49XR<*k8D)AGY`2S+B3hxOI zF3arFbQ=GjdUX8z9!GJEx9X=hVF&t4!u#?~-P6TbwA_P`H?XCIt;?nSxl!Ru;DrAXU61LB(velIY@k zHkWTdy!Kfwo8_-AKbX5xPi5HEK6Dz}N^bZ$v|lyQ&`JcgNEgtqsG?9bel9dfPVl)< zhl`2n{EJXO8RGXsi~Rk@@Mc*7k2s}PKEvA4K<+u)jhYiMr>y}nZPP%7HSu2>$JR2@ zr}BU@vHOmgYl!-t=dU&Om0uQE@T|?KnHx3pkfqG=WvD$S?*Yea72kIN)L?~@OYy Wg*E46elPKhp^G*b&A#%gfBXabmGxTy diff --git a/example_model_manager.py b/example_model_manager.py index f1f6ef0..e01b607 100644 --- a/example_model_manager.py +++ b/example_model_manager.py @@ -6,127 +6,127 @@ manager = ModelManager() -# 获取所有预处理方法 -print("--------------------------------------------获取预处理方法---------------------------------------------------") -methods = manager.get_models() -print("模型列表:") -print(methods) -print("--------------------------------------------获取预处理方法 end ---------------------------------------------------") +# # 获取所有预处理方法 +# print("--------------------------------------------获取预处理方法---------------------------------------------------") +# methods = manager.get_models() +# print("模型列表:") +# print(methods) +# print("--------------------------------------------获取预处理方法 end ---------------------------------------------------") print("--------------------------------------------获取方法详细信息---------------------------------------------------") # 获取特定方法的详细信息 -method_details = manager.get_model_details('SVC') -print("\nSVC方法详情:") +method_details = manager.get_model_details('LinearRegression') +print("\nLinearRegression方法详情:") print(method_details) print("--------------------------------------------获取方法详细信息 end ---------------------------------------------------") -print("--------------------------------------------评价指标 ---------------------------------------------------") -# 获取评价指标 -print(manager.get_metrics()) -print("--------------------------------------------评价指标 end ---------------------------------------------------") +# print("--------------------------------------------评价指标 ---------------------------------------------------") +# # 获取评价指标 +# print(manager.get_metrics()) +# print("--------------------------------------------评价指标 end ---------------------------------------------------") -print("--------------------------------------------获取所有已训练模型 ---------------------------------------------------") -# 获取所有已训练模型 -result = manager.get_finished_models( - page=1, - page_size=10, - experiment_name='breast_cancer_classification_3' -) +# print("--------------------------------------------获取所有已训练模型 ---------------------------------------------------") +# # 获取所有已训练模型 +# result = manager.get_finished_models( +# page=1, +# page_size=10, +# experiment_name='breast_cancer_classification_3' +# ) -# 打印结果 -print("\n已训练模型列表:") -print(f"状态: {result['status']}") -if result['status'] == 'success': - print(f"\n总数: {result['total_count']}") - print(f"当前页: {result['page']}") - print(f"每页数量: {result['page_size']}") - print("\n模型列表:") - for model in result['models']: - ''' - 'run_id': run['run_id'], - 'experiment_id': run['experiment_id'], - ''' - print(f"run_id", model['run_id']) - print(f"experiment_id", model['experiment_id']) - print(f"算法: {model['algorithm']}") - print(f"任务类型: {model['task_type']}") - print(f"数据集: {model['dataset']}") +# # 打印结果 +# print("\n已训练模型列表:") +# print(f"状态: {result['status']}") +# if result['status'] == 'success': +# print(f"\n总数: {result['total_count']}") +# print(f"当前页: {result['page']}") +# print(f"每页数量: {result['page_size']}") +# print("\n模型列表:") +# for model in result['models']: +# ''' +# 'run_id': run['run_id'], +# 'experiment_id': run['experiment_id'], +# ''' +# print(f"run_id", model['run_id']) +# print(f"experiment_id", model['experiment_id']) +# print(f"算法: {model['algorithm']}") +# print(f"任务类型: {model['task_type']}") +# print(f"数据集: {model['dataset']}") - print(f"训练开始时间: {model['training_start_time']}") - print(f"训练结束时间: {model['training_end_time']}") - print("模型参数:") - for k, v in model['parameters'].items(): - print(f" {k}: {v}") - print("评估指标:") - for metric_name, metric_value in model['metrics'].items(): - print(f" {metric_name}: {metric_value:.4f}") -else: - print(f"错误信息: {result['message']}") +# print(f"训练开始时间: {model['training_start_time']}") +# print(f"训练结束时间: {model['training_end_time']}") +# print("模型参数:") +# for k, v in model['parameters'].items(): +# print(f" {k}: {v}") +# print("评估指标:") +# for metric_name, metric_value in model['metrics'].items(): +# print(f" {metric_name}: {metric_value:.4f}") +# else: +# print(f"错误信息: {result['message']}") -print("--------------------------------------------获取所有已训练模型 end ---------------------------------------------------") +# print("--------------------------------------------获取所有已训练模型 end ---------------------------------------------------") -print("--------------------------------------------模型训练---------------------------------------------------") -# 加载数据 -train_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/train_breast_cancer_20250219_144629.csv') -val_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/val_breast_cancer_20250219_144629.csv') +# print("--------------------------------------------模型训练---------------------------------------------------") +# # 加载数据 +# train_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/train_breast_cancer_20250219_144629.csv') +# val_data = pd.read_csv('/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629/val_breast_cancer_20250219_144629.csv') -# 准备特征和标签 -X_train = train_data.drop('target', axis=1) -y_train = train_data['target'] -X_val = val_data.drop('target', axis=1) -y_val = val_data['target'] +# # 准备特征和标签 +# X_train = train_data.drop('target', axis=1) +# y_train = train_data['target'] +# X_val = val_data.drop('target', axis=1) +# y_val = val_data['target'] -# 模型配置 -model_config = { - 'algorithm': 'XGBClassifier', - 'task_type': 'classification', - 'dataset' : '/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629', - 'params': { - 'n_estimators': 100, - 'learning_rate': 0.1, - 'max_depth': 6, - 'random_state': 42 - } -} +# # 模型配置 +# model_config = { +# 'algorithm': 'XGBClassifier', +# 'task_type': 'classification', +# 'dataset' : '/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_144629', +# 'params': { +# 'n_estimators': 100, +# 'learning_rate': 0.1, +# 'max_depth': 6, +# 'random_state': 42 +# } +# } -# 训练模型, 删除训练实验时要删除 mlruns/.trash/ 回收站里的文件 -# 模型文件 直接在 mlruns/文件夹下 -for i in range(3, 4): - result = manager.train_model( - { - 'features': X_train, - 'labels': y_train - }, - { - 'features': X_val, - 'labels': y_val - }, - model_config, - f'breast_cancer_classification_{i}' - ) +# # 训练模型, 删除训练实验时要删除 mlruns/.trash/ 回收站里的文件 +# # 模型文件 直接在 mlruns/文件夹下 +# for i in range(3, 4): +# result = manager.train_model( +# { +# 'features': X_train, +# 'labels': y_train +# }, +# { +# 'features': X_val, +# 'labels': y_val +# }, +# model_config, +# f'breast_cancer_classification_{i}' +# ) -# 打印结果 -print("\n训练结果:") -print(f"状态: {result['status']}") -if result['status'] == 'success': - print(f"\nMLflow运行ID: {result['run_id']}") - print("\n评估指标:") - for metric_name, metric_value in result['metrics'].items(): - print(f"{metric_name}: {metric_value:.4f}") -else: - print(f"错误信息: {result['message']}") +# # 打印结果 +# print("\n训练结果:") +# print(f"状态: {result['status']}") +# if result['status'] == 'success': +# print(f"\nMLflow运行ID: {result['run_id']}") +# print("\n评估指标:") +# for metric_name, metric_value in result['metrics'].items(): +# print(f"{metric_name}: {metric_value:.4f}") +# else: +# print(f"错误信息: {result['message']}") -print("-------------------------------------------模型训练 end ---------------------------------------------------") +# print("-------------------------------------------模型训练 end ---------------------------------------------------") -print("--------------------------------------------模型预测 ---------------------------------------------------") -print(manager.predict(run_id = "33939ea6d8ce4d43a268f23f7361651e",\ - data_path="/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_145614/test_breast_cancer_20250219_145614.csv",\ - output_path="predictions/pred_breast_cancer_20250219_145614.csv" ,\ - metrics= ["accuracy", "f1", "precision", "recall"] )) +# print("--------------------------------------------模型预测 ---------------------------------------------------") +# print(manager.predict(run_id = "33939ea6d8ce4d43a268f23f7361651e",\ +# data_path="/home/admin-root/haotian/MLPlatform/dataset/dataset_processed/breast_cancer_20250219_145614/test_breast_cancer_20250219_145614.csv",\ +# output_path="predictions/pred_breast_cancer_20250219_145614.csv" ,\ +# metrics= ["accuracy", "f1", "precision", "recall"] )) -print("-------------------------------------------模型预测 end ---------------------------------------------------") \ No newline at end of file +# print("-------------------------------------------模型预测 end ---------------------------------------------------") \ No newline at end of file diff --git a/main.py b/main.py index 21c8136..210f61a 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ from fastapi.security import OAuth2PasswordBearer from fastapi.responses import JSONResponse from fastapi.exceptions import RequestValidationError from typing import Optional, Dict, List +from contextlib import asynccontextmanager import uvicorn from pathlib import Path import logging @@ -16,13 +17,35 @@ from api.data_api import router as data_router from api.model_api import router as model_router from api.system_api import router as system_router + +# 设置watchfiles 日志级别为warning +logging.getLogger("watchfiles").setLevel(logging.WARNING) + + +# 生命周期管理 +@asynccontextmanager +async def lifespan(app: FastAPI): + """生命周期管理(替代原来的startup/shutdown事件)""" + # 启动时的初始化操作 + logger.info("Server starting up...") + Path("dataset/dataset_raw").mkdir(parents=True, exist_ok=True) + Path("dataset/dataset_processed").mkdir(parents=True, exist_ok=True) + Path(".log").mkdir(exist_ok=True) + logger.info("Server started successfully") + + yield # 应用运行期间 + + # 关闭时的清理操作 + logger.info("Server shutting down...") + # 创建FastAPI应用 app = FastAPI( title="机器学习平台API", description="提供数据处理、模型训练和系统监控功能的API服务", version="1.0.0", docs_url="/docs", - redoc_url="/redoc" + redoc_url="/redoc", + lifespan=lifespan # 使用新的生命周期管理方式 ) # 加载配置 @@ -141,30 +164,17 @@ async def health_check(): "environment": config.get('environment', 'production') } -# 启动事件 -@app.on_event("startup") -async def startup_event(): - """服务启动时的初始化操作""" - logger.info("Server starting up...") - # 创建必要的目录 - Path("dataset/dataset_raw").mkdir(parents=True, exist_ok=True) - Path("dataset/dataset_processed").mkdir(parents=True, exist_ok=True) - Path(".log").mkdir(exist_ok=True) - logger.info("Server started successfully") -# 关闭事件 -@app.on_event("shutdown") -async def shutdown_event(): - """服务关闭时的清理操作""" - logger.info("Server shutting down...") + if __name__ == "__main__": uvicorn.run( "main:app", host=config.get('host', '0.0.0.0'), port=config.get('port', 8992), + # reload=True 支持热重载 reload=config.get('debug', True), workers=config.get('workers', 4), - log_level=config.get('log_level', 'info'), + log_level=config.get('log_level', 'warning'), access_log=True ) \ No newline at end of file diff --git a/model/model.yaml b/model/model.yaml index cfafca2..e641bb2 100644 --- a/model/model.yaml +++ b/model/model.yaml @@ -159,6 +159,7 @@ classification_algorithms: - "自然语言处理。" regression_algorithms: + LinearRegression: principle: "线性回归通过最小化数据点与回归线之间的误差平方和,来拟合一条最佳的直线。" advantages: @@ -310,15 +311,16 @@ clustering_algorithms: - "初始化仍然可能影响最终结果" applicable_scenarios: "适用于K均值聚类方法,并且希望改进初始中心选择的场景。" - HierarchicalKMeans: - principle: "层次化K均值结合了层次聚类和K均值聚类的方法,逐步将样本合并到已有簇中,形成层次化结构。" - advantages: - - "能够自动确定簇的数量" - - "生成的树状图有助于理解数据结构" - disadvantages: - - "计算量大,尤其是在数据量较大时" - - "对噪声和离群点敏感" - applicable_scenarios: "适用于不确定簇的数量且数据结构较复杂的情况。" + # 没实现这个方法 + # HierarchicalKMeans: + # principle: "层次化K均值结合了层次聚类和K均值聚类的方法,逐步将样本合并到已有簇中,形成层次化结构。" + # advantages: + # - "能够自动确定簇的数量" + # - "生成的树状图有助于理解数据结构" + # disadvantages: + # - "计算量大,尤其是在数据量较大时" + # - "对噪声和离群点敏感" + # applicable_scenarios: "适用于不确定簇的数量且数据结构较复杂的情况。" FCM: principle: "模糊C均值(FCM)允许每个数据点属于多个簇,基于隶属度来进行聚类。" diff --git a/model/parameter.yaml b/model/parameter.yaml index 11a13bc..8bef8e4 100644 --- a/model/parameter.yaml +++ b/model/parameter.yaml @@ -202,23 +202,23 @@ classification_algorithms: regression_algorithms: LinearRegression: - parameters: - - name: "fit_intercept" - type: "bool" - default: "True" - description: "是否计算截距。" - - name: "normalize" - type: "bool" - default: "False" - description: "是否对数据进行归一化处理。(已弃用)" - - name: "copy_X" - type: "bool" - default: "True" - description: "是否复制输入数据。" - - name: "n_jobs" - type: "int" - default: "None" - description: "用于计算的并行作业数。" + parameters: + - name: "fit_intercept" + type: "bool" + default: "True" + description: "是否计算截距。" + - name: "normalize" + type: "bool" + default: "False" + description: "是否对数据进行归一化处理。(已弃用)" + - name: "copy_X" + type: "bool" + default: "True" + description: "是否复制输入数据。" + - name: "n_jobs" + type: "int" + default: "None" + description: "用于计算的并行作业数。" PolynomialRegression: parameters: