完成--完成获取已处理好的数据集方法
This commit is contained in:
parent
d0ad80fadd
commit
837ccc386a
@ -188,28 +188,85 @@ Response:
|
||||
{
|
||||
"status": "success",
|
||||
"datasets": [
|
||||
{
|
||||
"id": "dataset_001",
|
||||
"name": "breast_cancer",
|
||||
"created_at": "2023-08-20T10:00:00",
|
||||
"files": {
|
||||
"raw": "dataset/dataset_raw/breast_cancer.csv",
|
||||
"processed": {
|
||||
"train": "dataset/dataset_processed/breast_cancer_train.csv",
|
||||
"val": "dataset/dataset_processed/breast_cancer_val.csv",
|
||||
"test": "dataset/dataset_processed/breast_cancer_test.csv"
|
||||
}
|
||||
},
|
||||
"process_history": [
|
||||
{
|
||||
"step": "missing_value_handler",
|
||||
"params": {"strategy": "mean"}
|
||||
},
|
||||
{
|
||||
"step": "standard_scaler",
|
||||
"params": {"with_mean": true}
|
||||
}
|
||||
"input_file": "dataset/dataset_raw/breast_cancer.csv",
|
||||
"timestamp": "2025-02-18T09:48:48.983863",
|
||||
"process_methods": [
|
||||
{
|
||||
"method_name": "SimpleImputer",
|
||||
"params": {
|
||||
"strategy": "mean",
|
||||
"missing_values": NaN
|
||||
}
|
||||
},
|
||||
{
|
||||
"method_name": "IsolationForest",
|
||||
"params": {
|
||||
"contamination": 0.1,
|
||||
"random_state": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"method_name": "StandardScaler",
|
||||
"params": {
|
||||
"with_mean": true,
|
||||
"with_std": true
|
||||
}
|
||||
}
|
||||
],
|
||||
"feature_methods": [],
|
||||
"split_params": {
|
||||
"test_size": 0.2,
|
||||
"val_size": 0.1
|
||||
},
|
||||
"steps": [
|
||||
{
|
||||
"step": "load_data",
|
||||
"shape": [
|
||||
569,
|
||||
31
|
||||
]
|
||||
},
|
||||
{
|
||||
"step": "cleaning",
|
||||
"method": "SimpleImputer",
|
||||
"params": {
|
||||
"strategy": "mean",
|
||||
"missing_values": NaN
|
||||
},
|
||||
"shape": [
|
||||
569,
|
||||
31
|
||||
]
|
||||
},
|
||||
{
|
||||
"step": "cleaning",
|
||||
"method": "IsolationForest",
|
||||
"params": {
|
||||
"contamination": 0.1,
|
||||
"random_state": 42
|
||||
},
|
||||
"shape": [
|
||||
512,
|
||||
31
|
||||
]
|
||||
},
|
||||
{
|
||||
"step": "cleaning",
|
||||
"method": "StandardScaler",
|
||||
"params": {
|
||||
"with_mean": true,
|
||||
"with_std": true
|
||||
},
|
||||
"shape": [
|
||||
512,
|
||||
31
|
||||
]
|
||||
}
|
||||
],
|
||||
"output_files": {
|
||||
"train": "dataset/dataset_processed/breast_cancer_20250218_094848/train_breast_cancer_20250218_094848.csv",
|
||||
"validation": "dataset/dataset_processed/breast_cancer_20250218_094848/val_breast_cancer_20250218_094848.csv",
|
||||
"test": "dataset/dataset_processed/breast_cancer_20250218_094848/test_breast_cancer_20250218_094848.csv"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
5
example_get_all_dataset.py
Normal file
5
example_get_all_dataset.py
Normal file
@ -0,0 +1,5 @@
|
||||
from function.get_all_dataset import DatasetHistory
|
||||
|
||||
|
||||
t = DatasetHistory()
|
||||
print(t.get_dataset())
|
||||
BIN
function/__pycache__/get_all_dataset.cpython-39.pyc
Normal file
BIN
function/__pycache__/get_all_dataset.cpython-39.pyc
Normal file
Binary file not shown.
@ -1,4 +1,33 @@
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# 查询可用数据集, 处理过后的数据集默认在 dataset/dataset_processed/ 下
|
||||
# 各个文件夹下的json文件记录了处理数据
|
||||
# 各个文件夹下的json文件记录了处理数据
|
||||
|
||||
class DatasetHistory:
|
||||
def __init__(self) -> None:
|
||||
self.dataset_processed_path = 'dataset/dataset_processed'
|
||||
|
||||
def get_dataset(self):
|
||||
back = list()
|
||||
|
||||
dataset_files_path = os.listdir(self.dataset_processed_path)
|
||||
|
||||
for dataset_file in dataset_files_path:
|
||||
path = os.path.join(self.dataset_processed_path, dataset_file)
|
||||
# 指定要查看的文件夹路径
|
||||
folder_path = Path(path)
|
||||
|
||||
# 获取文件夹下所有以 .json 结尾的文件
|
||||
json_files = list(folder_path.glob('*.json'))
|
||||
|
||||
for json_file in json_files:
|
||||
with open(json_file.as_posix(), 'r', encoding='utf-8') as f:
|
||||
json_data = json.load(f)
|
||||
|
||||
back.append(json_data)
|
||||
return back
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user