MLPlatform/model/parameter.yaml

651 lines
19 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

classification_algorithms:
LogisticRegression:
parameters:
- name: "penalty"
type: "str"
default: "l2"
description: "用于正则化的惩罚项,可选 'l1', 'l2', 'elasticnet', 'none'。"
- name: "C"
type: "float"
default: "1.0"
description: "正则化强度的倒数,较小的值表示更强的正则化。"
- name: "solver"
type: "str"
default: "lbfgs"
description: "优化算法,可选 'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'。"
- name: "max_iter"
type: "int"
default: "100"
description: "最大迭代次数,控制优化收敛速度。"
SVC:
parameters:
- name: "C"
type: "float"
default: "1.0"
description: "正则化参数,控制决策边界的松弛程度。"
- name: "kernel"
type: "str"
default: "rbf"
description: "核函数类型,可选 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'。"
- name: "degree"
type: "int"
default: "3"
description: "多项式核函数的维度,仅当 kernel='poly' 时有效。"
- name: "gamma"
type: "str"
default: "scale"
description: "核函数系数,可选 'scale', 'auto' 或浮点数。"
- name: "max_iter"
type: "int"
default: "-1"
description: "最大迭代次数,-1 表示无限制。"
DecisionTreeClassifier:
parameters:
- name: "criterion"
type: "str"
default: "gini"
description: "用于划分的准则,可选 'gini' 或 'entropy'。"
- name: "max_depth"
type: "int"
default: "None"
description: "树的最大深度None 表示不限制。"
- name: "min_samples_split"
type: "int"
default: "2"
description: "内部节点再划分所需的最小样本数。"
- name: "min_samples_leaf"
type: "int"
default: "1"
description: "叶子节点包含的最小样本数。"
RandomForestClassifier:
parameters:
- name: "n_estimators"
type: "int"
default: "100"
description: "森林中树的数量。"
- name: "criterion"
type: "str"
default: "gini"
description: "用于划分的准则,可选 'gini' 或 'entropy'。"
- name: "max_depth"
type: "int"
default: "None"
description: "树的最大深度None 表示不限制。"
- name: "bootstrap"
type: "bool"
default: "True"
description: "是否进行自助采样。"
XGBClassifier:
parameters:
- name: "n_estimators"
type: "int"
default: "100"
description: "树的数量。"
- name: "learning_rate"
type: "float"
default: "0.1"
description: "学习率,控制每棵树的贡献程度。"
- name: "max_depth"
type: "int"
default: "6"
description: "每棵树的最大深度。"
- name: "gamma"
type: "float"
default: "0"
description: "节点分裂所需的最小损失减少量。"
AdaBoostClassifier:
parameters:
- name: "n_estimators"
type: "int"
default: "50"
description: "弱分类器的数量。"
- name: "learning_rate"
type: "float"
default: "1.0"
description: "更新权重时的缩放因子。"
- name: "algorithm"
type: "str"
default: "SAMME.R"
description: "提升算法,可选 'SAMME' 或 'SAMME.R'。"
CatBoostClassifier:
parameters:
- name: "iterations"
type: "int"
default: "1000"
description: "训练的迭代次数。"
- name: "learning_rate"
type: "float"
default: "0.03"
description: "学习率,控制更新步长。"
- name: "depth"
type: "int"
default: "6"
description: "树的深度。"
LGBMClassifier:
parameters:
- name: "num_leaves"
type: "int"
default: "31"
description: "树的最大叶子数。"
- name: "learning_rate"
type: "float"
default: "0.1"
description: "学习率。"
- name: "n_estimators"
type: "int"
default: "100"
description: "树的数量。"
GaussianNB:
parameters:
- name: "var_smoothing"
type: "float"
default: "1e-9"
description: "加在方差上的小数,防止零方差问题。"
KNeighborsClassifier:
parameters:
- name: "n_neighbors"
type: "int"
default: "5"
description: "最近邻的数量。"
- name: "weights"
type: "str"
default: "uniform"
description: "权重分配策略,可选 'uniform' 或 'distance'。"
- name: "algorithm"
type: "str"
default: "auto"
description: "搜索最近邻的算法。"
MLPClassifier:
parameters:
- name: "hidden_layer_sizes"
type: "tuple"
default: "(100,)"
description: "隐藏层的神经元数量。"
- name: "activation"
type: "str"
default: "relu"
description: "激活函数,可选 'identity', 'logistic', 'tanh', 'relu'。"
- name: "solver"
type: "str"
default: "adam"
description: "优化算法,可选 'lbfgs', 'sgd', 'adam'。"
- name: "max_iter"
type: "int"
default: "200"
description: "最大迭代次数。"
GradientBoostingClassifier:
parameters:
- name: "n_estimators"
type: "int"
default: "100"
description: "树的数量。"
- name: "learning_rate"
type: "float"
default: "0.1"
description: "学习率。"
- name: "max_depth"
type: "int"
default: "3"
description: "每棵树的最大深度。"
regression_algorithms:
LinearRegression:
parameters:
- name: "fit_intercept"
type: "bool"
default: "True"
description: "是否计算截距。"
- name: "normalize"
type: "bool"
default: "False"
description: "是否对数据进行归一化处理。(已弃用)"
- name: "copy_X"
type: "bool"
default: "True"
description: "是否复制输入数据。"
- name: "n_jobs"
type: "int"
default: "None"
description: "用于计算的并行作业数。"
PolynomialRegression:
parameters:
- name: "degree"
type: "int"
default: "2"
description: "多项式的最高次数。"
- name: "interaction_only"
type: "bool"
default: "False"
description: "是否仅考虑特征之间的交互项。"
- name: "include_bias"
type: "bool"
default: "True"
description: "是否包含偏置项。"
Ridge:
parameters:
- name: "alpha"
type: "float"
default: "1.0"
description: "正则化力度。"
- name: "fit_intercept"
type: "bool"
default: "True"
description: "是否计算截距。"
- name: "max_iter"
type: "int"
default: "None"
description: "最大迭代次数。"
- name: "tol"
type: "float"
default: "0.001"
description: "容忍误差。"
- name: "solver"
type: "str"
default: "auto"
description: "求解器选择。"
Lasso:
parameters:
- name: "alpha"
type: "float"
default: "1.0"
description: "正则化参数。"
- name: "fit_intercept"
type: "bool"
default: "True"
description: "是否计算截距。"
- name: "max_iter"
type: "int"
default: "1000"
description: "最大迭代次数。"
- name: "tol"
type: "float"
default: "0.0001"
description: "收敛容忍度。"
- name: "selection"
type: "str"
default: "cyclic"
description: "特征选择方式,可选 'cyclic' 或 'random'。"
ElasticNet:
parameters:
- name: "alpha"
type: "float"
default: "1.0"
description: "正则化参数。"
- name: "l1_ratio"
type: "float"
default: "0.5"
description: "L1正则化比例控制L1和L2的混合比例。"
- name: "fit_intercept"
type: "bool"
default: "True"
description: "是否计算截距。"
- name: "max_iter"
type: "int"
default: "1000"
description: "最大迭代次数。"
- name: "tol"
type: "float"
default: "0.0001"
description: "收敛容忍度。"
SVR:
parameters:
- name: "kernel"
type: "str"
default: "rbf"
description: "核函数类型,可选 'linear', 'poly', 'rbf', 'sigmoid'。"
- name: "C"
type: "float"
default: "1.0"
description: "惩罚参数。"
- name: "epsilon"
type: "float"
default: "0.1"
description: "epsilon不敏感损失中的参数。"
- name: "degree"
type: "int"
default: "3"
description: "多项式核函数的度数,仅当 kernel='poly' 时有效。"
- name: "gamma"
type: "str"
default: "scale"
description: "核函数系数。"
DecisionTreeRegressor:
parameters:
- name: "criterion"
type: "str"
default: "squared_error"
description: "衡量分裂质量的指标。"
- name: "splitter"
type: "str"
default: "best"
description: "划分策略,可选 'best' 或 'random'。"
- name: "max_depth"
type: "int"
default: "None"
description: "树的最大深度。"
- name: "min_samples_split"
type: "int/float"
default: "2"
description: "内部节点再划分所需的最小样本数。"
- name: "min_samples_leaf"
type: "int/float"
default: "1"
description: "叶子节点最少样本数。"
RandomForestRegressor:
parameters:
- name: "n_estimators"
type: "int"
default: "100"
description: "森林中树的数量。"
- name: "criterion"
type: "str"
default: "squared_error"
description: "衡量分裂质量的指标。"
- name: "max_depth"
type: "int"
default: "None"
description: "树的最大深度。"
- name: "min_samples_split"
type: "int"
default: "2"
description: "内部节点再划分所需的最小样本数。"
- name: "n_jobs"
type: "int"
default: "None"
description: "用于计算的并行作业数。"
XGBRegressor:
parameters:
- name: "max_depth"
type: "int"
default: "3"
description: "树的最大深度。"
- name: "learning_rate"
type: "float"
default: "0.1"
description: "学习率。"
- name: "n_estimators"
type: "int"
default: "100"
description: "树的数量。"
- name: "objective"
type: "str"
default: "reg:squarederror"
description: "损失函数。"
- name: "subsample"
type: "float"
default: "1"
description: "采样比例。"
AdaBoostRegressor:
parameters:
- name: "n_estimators"
type: "int"
default: "50"
description: "基学习器的数量。"
- name: "learning_rate"
type: "float"
default: "1.0"
description: "学习率。"
- name: "loss"
type: "str"
default: "linear"
description: "损失函数类型,可选 'linear', 'square', 'exponential'。"
CatBoostRegressor:
parameters:
- name: "iterations"
type: "int"
default: "1000"
description: "迭代次数。"
- name: "learning_rate"
type: "float"
default: "0.03"
description: "学习率。"
- name: "depth"
type: "int"
default: "6"
description: "树的深度。"
- name: "l2_leaf_reg"
type: "float"
default: "3.0"
description: "L2正则化系数。"
- name: "loss_function"
type: "str"
default: "RMSE"
description: "损失函数。"
LGBMRegressor:
parameters:
- name: "num_leaves"
type: "int"
default: "31"
description: "叶子节点数量。"
- name: "learning_rate"
type: "float"
default: "0.1"
description: "学习率。"
- name: "n_estimators"
type: "int"
default: "100"
description: "树的数量。"
- name: "objective"
type: "str"
default: "regression"
description: "目标函数。"
- name: "subsample"
type: "float"
default: "1.0"
description: "采样比例。"
MLPRegressor:
parameters:
- name: "hidden_layer_sizes"
type: "tuple"
default: "(100,)"
description: "隐藏层的神经元数量和层数。"
- name: "activation"
type: "str"
default: "relu"
description: "激活函数,可选 'identity', 'logistic', 'tanh', 'relu'。"
- name: "solver"
type: "str"
default: "adam"
description: "权重优化算法。"
- name: "alpha"
type: "float"
default: "0.0001"
description: "L2正则化参数。"
- name: "max_iter"
type: "int"
default: "200"
description: "最大迭代次数。"
clustering_algorithms:
KMeans:
parameters:
- name: "n_clusters"
type: "int"
default: "8"
description: "簇的数量K均值聚类的核心参数。"
- name: "init"
type: "str"
default: "k-means++"
description: "初始化方法,可选 'k-means++'、'random' 或数组类型。"
- name: "n_init"
type: "int"
default: "10"
description: "初始化次数K均值算法会运行该次数选择最优结果。"
- name: "max_iter"
type: "int"
default: "300"
description: "每次运行的最大迭代次数。"
- name: "tol"
type: "float"
default: "1e-4"
description: "算法收敛的容忍误差,当目标函数变化小于此值时,认为算法收敛。"
- name: "precompute_distances"
type: "bool"
default: "True"
description: "是否预计算距离矩阵(会增加计算量)。"
KMeansPlusPlus:
parameters:
- name: "n_clusters"
type: "int"
default: "8"
description: "簇的数量。"
- name: "n_init"
type: "int"
default: "10"
description: "初始化次数。"
- name: "max_iter"
type: "int"
default: "300"
description: "最大迭代次数。"
- name: "tol"
type: "float"
default: "1e-4"
description: "容忍误差,用于判断算法是否收敛。"
AgglomerativeClustering:
parameters:
- name: "n_clusters"
type: "int"
default: "2"
description: "簇的数量。"
- name: "affinity"
type: "str"
default: "euclidean"
description: "衡量样本间距离的方式,可选 'euclidean'、'manhattan'、'cosine'。"
- name: "memory"
type: "str/None"
default: "None"
description: "缓存路径,存储树状结构。"
- name: "linkage"
type: "str"
default: "ward"
description: "聚类方式,可选 'ward'(最小化平方误差)、'average'、'complete'。"
- name: "compute_full_tree"
type: "str"
default: "auto"
description: "是否计算完全树结构,'auto'为自动,'True'为计算,'False'为不计算。"
FCM:
parameters:
- name: "n_clusters"
type: "int"
default: "2"
description: "簇的数量。"
- name: "m"
type: "float"
default: "2.0"
description: "模糊指数控制隶属度的模糊程度通常设置为2。"
- name: "max_iter"
type: "int"
default: "100"
description: "最大迭代次数。"
- name: "tol"
type: "float"
default: "1e-5"
description: "收敛容忍度,当隶属度变化小于此值时,认为算法收敛。"
- name: "random_state"
type: "int"
default: "None"
description: "随机种子,用于初始化隶属度矩阵。"
DBSCAN:
parameters:
- name: "eps"
type: "float"
default: "0.5"
description: "邻域的最大距离,决定样本是否在同一簇中。"
- name: "min_samples"
type: "int"
default: "5"
description: "形成簇的最小样本数。"
- name: "metric"
type: "str"
default: "euclidean"
description: "计算距离的方式,可选 'euclidean', 'manhattan', 'cosine' 等。"
- name: "algorithm"
type: "str"
default: "auto"
description: "计算最近邻的方法,可选 'auto', 'ball_tree', 'kd_tree', 'brute'。"
- name: "leaf_size"
type: "int"
default: "30"
description: "对于BallTree或KDTree的叶子大小影响效率。"
GaussianMixture:
parameters:
- name: "n_components"
type: "int"
default: "1"
description: "高斯混合分布的组件数,表示簇的数量。"
- name: "covariance_type"
type: "str"
default: "full"
description: "协方差类型,可选 'full', 'tied', 'diag', 'spherical'。"
- name: "tol"
type: "float"
default: "1e-3"
description: "收敛容忍度,当对数似然变化小于此值时停止算法。"
- name: "max_iter"
type: "int"
default: "100"
description: "最大迭代次数。"
- name: "random_state"
type: "int"
default: "None"
description: "随机种子,用于初始化高斯混合模型的参数。"
SpectralClustering:
parameters:
- name: "n_clusters"
type: "int"
default: "8"
description: "簇的数量。"
- name: "affinity"
type: "str"
default: "rbf"
description: "计算相似度的方式,可选 'rbf'、'nearest_neighbors'、'precomputed'。"
- name: "n_neighbors"
type: "int"
default: "10"
description: "对于 'nearest_neighbors' 相似度,使用的邻居数。"
- name: "gamma"
type: "float"
default: "1.0"
description: "用于计算径向基函数的gamma参数影响相似度的计算。"
- name: "eigen_solver"
type: "str"
default: "auto"
description: "求解特征值的方式,可选 'arpack', 'lobpcg', 'auto'。"
- name: "random_state"
type: "int"
default: "None"
description: "随机种子,用于初始化谱聚类的计算。"