651 lines
19 KiB
YAML
651 lines
19 KiB
YAML
classification_algorithms:
|
||
LogisticRegression:
|
||
parameters:
|
||
- name: "penalty"
|
||
type: "str"
|
||
default: "l2"
|
||
description: "用于正则化的惩罚项,可选 'l1', 'l2', 'elasticnet', 'none'。"
|
||
- name: "C"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "正则化强度的倒数,较小的值表示更强的正则化。"
|
||
- name: "solver"
|
||
type: "str"
|
||
default: "lbfgs"
|
||
description: "优化算法,可选 'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "100"
|
||
description: "最大迭代次数,控制优化收敛速度。"
|
||
|
||
SVC:
|
||
parameters:
|
||
- name: "C"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "正则化参数,控制决策边界的松弛程度。"
|
||
- name: "kernel"
|
||
type: "str"
|
||
default: "rbf"
|
||
description: "核函数类型,可选 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'。"
|
||
- name: "degree"
|
||
type: "int"
|
||
default: "3"
|
||
description: "多项式核函数的维度,仅当 kernel='poly' 时有效。"
|
||
- name: "gamma"
|
||
type: "str"
|
||
default: "scale"
|
||
description: "核函数系数,可选 'scale', 'auto' 或浮点数。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "-1"
|
||
description: "最大迭代次数,-1 表示无限制。"
|
||
|
||
DecisionTreeClassifier:
|
||
parameters:
|
||
- name: "criterion"
|
||
type: "str"
|
||
default: "gini"
|
||
description: "用于划分的准则,可选 'gini' 或 'entropy'。"
|
||
- name: "max_depth"
|
||
type: "int"
|
||
default: "None"
|
||
description: "树的最大深度,None 表示不限制。"
|
||
- name: "min_samples_split"
|
||
type: "int"
|
||
default: "2"
|
||
description: "内部节点再划分所需的最小样本数。"
|
||
- name: "min_samples_leaf"
|
||
type: "int"
|
||
default: "1"
|
||
description: "叶子节点包含的最小样本数。"
|
||
|
||
RandomForestClassifier:
|
||
parameters:
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "100"
|
||
description: "森林中树的数量。"
|
||
- name: "criterion"
|
||
type: "str"
|
||
default: "gini"
|
||
description: "用于划分的准则,可选 'gini' 或 'entropy'。"
|
||
- name: "max_depth"
|
||
type: "int"
|
||
default: "None"
|
||
description: "树的最大深度,None 表示不限制。"
|
||
- name: "bootstrap"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否进行自助采样。"
|
||
|
||
XGBClassifier:
|
||
parameters:
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "100"
|
||
description: "树的数量。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "0.1"
|
||
description: "学习率,控制每棵树的贡献程度。"
|
||
- name: "max_depth"
|
||
type: "int"
|
||
default: "6"
|
||
description: "每棵树的最大深度。"
|
||
- name: "gamma"
|
||
type: "float"
|
||
default: "0"
|
||
description: "节点分裂所需的最小损失减少量。"
|
||
|
||
AdaBoostClassifier:
|
||
parameters:
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "50"
|
||
description: "弱分类器的数量。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "更新权重时的缩放因子。"
|
||
- name: "algorithm"
|
||
type: "str"
|
||
default: "SAMME.R"
|
||
description: "提升算法,可选 'SAMME' 或 'SAMME.R'。"
|
||
|
||
CatBoostClassifier:
|
||
parameters:
|
||
- name: "iterations"
|
||
type: "int"
|
||
default: "1000"
|
||
description: "训练的迭代次数。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "0.03"
|
||
description: "学习率,控制更新步长。"
|
||
- name: "depth"
|
||
type: "int"
|
||
default: "6"
|
||
description: "树的深度。"
|
||
|
||
LGBMClassifier:
|
||
parameters:
|
||
- name: "num_leaves"
|
||
type: "int"
|
||
default: "31"
|
||
description: "树的最大叶子数。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "0.1"
|
||
description: "学习率。"
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "100"
|
||
description: "树的数量。"
|
||
|
||
GaussianNB:
|
||
parameters:
|
||
- name: "var_smoothing"
|
||
type: "float"
|
||
default: "1e-9"
|
||
description: "加在方差上的小数,防止零方差问题。"
|
||
|
||
KNeighborsClassifier:
|
||
parameters:
|
||
- name: "n_neighbors"
|
||
type: "int"
|
||
default: "5"
|
||
description: "最近邻的数量。"
|
||
- name: "weights"
|
||
type: "str"
|
||
default: "uniform"
|
||
description: "权重分配策略,可选 'uniform' 或 'distance'。"
|
||
- name: "algorithm"
|
||
type: "str"
|
||
default: "auto"
|
||
description: "搜索最近邻的算法。"
|
||
|
||
MLPClassifier:
|
||
parameters:
|
||
- name: "hidden_layer_sizes"
|
||
type: "tuple"
|
||
default: "(100,)"
|
||
description: "隐藏层的神经元数量。"
|
||
- name: "activation"
|
||
type: "str"
|
||
default: "relu"
|
||
description: "激活函数,可选 'identity', 'logistic', 'tanh', 'relu'。"
|
||
- name: "solver"
|
||
type: "str"
|
||
default: "adam"
|
||
description: "优化算法,可选 'lbfgs', 'sgd', 'adam'。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "200"
|
||
description: "最大迭代次数。"
|
||
|
||
GradientBoostingClassifier:
|
||
parameters:
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "100"
|
||
description: "树的数量。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "0.1"
|
||
description: "学习率。"
|
||
- name: "max_depth"
|
||
type: "int"
|
||
default: "3"
|
||
description: "每棵树的最大深度。"
|
||
|
||
|
||
regression_algorithms:
|
||
LinearRegression:
|
||
parameters:
|
||
- name: "fit_intercept"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否计算截距。"
|
||
- name: "normalize"
|
||
type: "bool"
|
||
default: "False"
|
||
description: "是否对数据进行归一化处理。(已弃用)"
|
||
- name: "copy_X"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否复制输入数据。"
|
||
- name: "n_jobs"
|
||
type: "int"
|
||
default: "None"
|
||
description: "用于计算的并行作业数。"
|
||
|
||
PolynomialRegression:
|
||
parameters:
|
||
- name: "degree"
|
||
type: "int"
|
||
default: "2"
|
||
description: "多项式的最高次数。"
|
||
- name: "interaction_only"
|
||
type: "bool"
|
||
default: "False"
|
||
description: "是否仅考虑特征之间的交互项。"
|
||
- name: "include_bias"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否包含偏置项。"
|
||
|
||
Ridge:
|
||
parameters:
|
||
- name: "alpha"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "正则化力度。"
|
||
- name: "fit_intercept"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否计算截距。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "None"
|
||
description: "最大迭代次数。"
|
||
- name: "tol"
|
||
type: "float"
|
||
default: "0.001"
|
||
description: "容忍误差。"
|
||
- name: "solver"
|
||
type: "str"
|
||
default: "auto"
|
||
description: "求解器选择。"
|
||
|
||
Lasso:
|
||
parameters:
|
||
- name: "alpha"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "正则化参数。"
|
||
- name: "fit_intercept"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否计算截距。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "1000"
|
||
description: "最大迭代次数。"
|
||
- name: "tol"
|
||
type: "float"
|
||
default: "0.0001"
|
||
description: "收敛容忍度。"
|
||
- name: "selection"
|
||
type: "str"
|
||
default: "cyclic"
|
||
description: "特征选择方式,可选 'cyclic' 或 'random'。"
|
||
|
||
ElasticNet:
|
||
parameters:
|
||
- name: "alpha"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "正则化参数。"
|
||
- name: "l1_ratio"
|
||
type: "float"
|
||
default: "0.5"
|
||
description: "L1正则化比例,控制L1和L2的混合比例。"
|
||
- name: "fit_intercept"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否计算截距。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "1000"
|
||
description: "最大迭代次数。"
|
||
- name: "tol"
|
||
type: "float"
|
||
default: "0.0001"
|
||
description: "收敛容忍度。"
|
||
|
||
SVR:
|
||
parameters:
|
||
- name: "kernel"
|
||
type: "str"
|
||
default: "rbf"
|
||
description: "核函数类型,可选 'linear', 'poly', 'rbf', 'sigmoid'。"
|
||
- name: "C"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "惩罚参数。"
|
||
- name: "epsilon"
|
||
type: "float"
|
||
default: "0.1"
|
||
description: "epsilon不敏感损失中的参数。"
|
||
- name: "degree"
|
||
type: "int"
|
||
default: "3"
|
||
description: "多项式核函数的度数,仅当 kernel='poly' 时有效。"
|
||
- name: "gamma"
|
||
type: "str"
|
||
default: "scale"
|
||
description: "核函数系数。"
|
||
|
||
DecisionTreeRegressor:
|
||
parameters:
|
||
- name: "criterion"
|
||
type: "str"
|
||
default: "squared_error"
|
||
description: "衡量分裂质量的指标。"
|
||
- name: "splitter"
|
||
type: "str"
|
||
default: "best"
|
||
description: "划分策略,可选 'best' 或 'random'。"
|
||
- name: "max_depth"
|
||
type: "int"
|
||
default: "None"
|
||
description: "树的最大深度。"
|
||
- name: "min_samples_split"
|
||
type: "int/float"
|
||
default: "2"
|
||
description: "内部节点再划分所需的最小样本数。"
|
||
- name: "min_samples_leaf"
|
||
type: "int/float"
|
||
default: "1"
|
||
description: "叶子节点最少样本数。"
|
||
|
||
RandomForestRegressor:
|
||
parameters:
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "100"
|
||
description: "森林中树的数量。"
|
||
- name: "criterion"
|
||
type: "str"
|
||
default: "squared_error"
|
||
description: "衡量分裂质量的指标。"
|
||
- name: "max_depth"
|
||
type: "int"
|
||
default: "None"
|
||
description: "树的最大深度。"
|
||
- name: "min_samples_split"
|
||
type: "int"
|
||
default: "2"
|
||
description: "内部节点再划分所需的最小样本数。"
|
||
- name: "n_jobs"
|
||
type: "int"
|
||
default: "None"
|
||
description: "用于计算的并行作业数。"
|
||
|
||
XGBRegressor:
|
||
parameters:
|
||
- name: "max_depth"
|
||
type: "int"
|
||
default: "3"
|
||
description: "树的最大深度。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "0.1"
|
||
description: "学习率。"
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "100"
|
||
description: "树的数量。"
|
||
- name: "objective"
|
||
type: "str"
|
||
default: "reg:squarederror"
|
||
description: "损失函数。"
|
||
- name: "subsample"
|
||
type: "float"
|
||
default: "1"
|
||
description: "采样比例。"
|
||
|
||
AdaBoostRegressor:
|
||
parameters:
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "50"
|
||
description: "基学习器的数量。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "学习率。"
|
||
- name: "loss"
|
||
type: "str"
|
||
default: "linear"
|
||
description: "损失函数类型,可选 'linear', 'square', 'exponential'。"
|
||
|
||
CatBoostRegressor:
|
||
parameters:
|
||
- name: "iterations"
|
||
type: "int"
|
||
default: "1000"
|
||
description: "迭代次数。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "0.03"
|
||
description: "学习率。"
|
||
- name: "depth"
|
||
type: "int"
|
||
default: "6"
|
||
description: "树的深度。"
|
||
- name: "l2_leaf_reg"
|
||
type: "float"
|
||
default: "3.0"
|
||
description: "L2正则化系数。"
|
||
- name: "loss_function"
|
||
type: "str"
|
||
default: "RMSE"
|
||
description: "损失函数。"
|
||
|
||
LGBMRegressor:
|
||
parameters:
|
||
- name: "num_leaves"
|
||
type: "int"
|
||
default: "31"
|
||
description: "叶子节点数量。"
|
||
- name: "learning_rate"
|
||
type: "float"
|
||
default: "0.1"
|
||
description: "学习率。"
|
||
- name: "n_estimators"
|
||
type: "int"
|
||
default: "100"
|
||
description: "树的数量。"
|
||
- name: "objective"
|
||
type: "str"
|
||
default: "regression"
|
||
description: "目标函数。"
|
||
- name: "subsample"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "采样比例。"
|
||
|
||
MLPRegressor:
|
||
parameters:
|
||
- name: "hidden_layer_sizes"
|
||
type: "tuple"
|
||
default: "(100,)"
|
||
description: "隐藏层的神经元数量和层数。"
|
||
- name: "activation"
|
||
type: "str"
|
||
default: "relu"
|
||
description: "激活函数,可选 'identity', 'logistic', 'tanh', 'relu'。"
|
||
- name: "solver"
|
||
type: "str"
|
||
default: "adam"
|
||
description: "权重优化算法。"
|
||
- name: "alpha"
|
||
type: "float"
|
||
default: "0.0001"
|
||
description: "L2正则化参数。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "200"
|
||
description: "最大迭代次数。"
|
||
|
||
|
||
clustering_algorithms:
|
||
KMeans:
|
||
parameters:
|
||
- name: "n_clusters"
|
||
type: "int"
|
||
default: "8"
|
||
description: "簇的数量,K均值聚类的核心参数。"
|
||
- name: "init"
|
||
type: "str"
|
||
default: "k-means++"
|
||
description: "初始化方法,可选 'k-means++'、'random' 或数组类型。"
|
||
- name: "n_init"
|
||
type: "int"
|
||
default: "10"
|
||
description: "初始化次数,K均值算法会运行该次数,选择最优结果。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "300"
|
||
description: "每次运行的最大迭代次数。"
|
||
- name: "tol"
|
||
type: "float"
|
||
default: "1e-4"
|
||
description: "算法收敛的容忍误差,当目标函数变化小于此值时,认为算法收敛。"
|
||
- name: "precompute_distances"
|
||
type: "bool"
|
||
default: "True"
|
||
description: "是否预计算距离矩阵(会增加计算量)。"
|
||
|
||
KMeansPlusPlus:
|
||
parameters:
|
||
- name: "n_clusters"
|
||
type: "int"
|
||
default: "8"
|
||
description: "簇的数量。"
|
||
- name: "n_init"
|
||
type: "int"
|
||
default: "10"
|
||
description: "初始化次数。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "300"
|
||
description: "最大迭代次数。"
|
||
- name: "tol"
|
||
type: "float"
|
||
default: "1e-4"
|
||
description: "容忍误差,用于判断算法是否收敛。"
|
||
|
||
AgglomerativeClustering:
|
||
parameters:
|
||
- name: "n_clusters"
|
||
type: "int"
|
||
default: "2"
|
||
description: "簇的数量。"
|
||
- name: "affinity"
|
||
type: "str"
|
||
default: "euclidean"
|
||
description: "衡量样本间距离的方式,可选 'euclidean'、'manhattan'、'cosine'。"
|
||
- name: "memory"
|
||
type: "str/None"
|
||
default: "None"
|
||
description: "缓存路径,存储树状结构。"
|
||
- name: "linkage"
|
||
type: "str"
|
||
default: "ward"
|
||
description: "聚类方式,可选 'ward'(最小化平方误差)、'average'、'complete'。"
|
||
- name: "compute_full_tree"
|
||
type: "str"
|
||
default: "auto"
|
||
description: "是否计算完全树结构,'auto'为自动,'True'为计算,'False'为不计算。"
|
||
|
||
FCM:
|
||
parameters:
|
||
- name: "n_clusters"
|
||
type: "int"
|
||
default: "2"
|
||
description: "簇的数量。"
|
||
- name: "m"
|
||
type: "float"
|
||
default: "2.0"
|
||
description: "模糊指数,控制隶属度的模糊程度,通常设置为2。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "100"
|
||
description: "最大迭代次数。"
|
||
- name: "tol"
|
||
type: "float"
|
||
default: "1e-5"
|
||
description: "收敛容忍度,当隶属度变化小于此值时,认为算法收敛。"
|
||
- name: "random_state"
|
||
type: "int"
|
||
default: "None"
|
||
description: "随机种子,用于初始化隶属度矩阵。"
|
||
|
||
DBSCAN:
|
||
parameters:
|
||
- name: "eps"
|
||
type: "float"
|
||
default: "0.5"
|
||
description: "邻域的最大距离,决定样本是否在同一簇中。"
|
||
- name: "min_samples"
|
||
type: "int"
|
||
default: "5"
|
||
description: "形成簇的最小样本数。"
|
||
- name: "metric"
|
||
type: "str"
|
||
default: "euclidean"
|
||
description: "计算距离的方式,可选 'euclidean', 'manhattan', 'cosine' 等。"
|
||
- name: "algorithm"
|
||
type: "str"
|
||
default: "auto"
|
||
description: "计算最近邻的方法,可选 'auto', 'ball_tree', 'kd_tree', 'brute'。"
|
||
- name: "leaf_size"
|
||
type: "int"
|
||
default: "30"
|
||
description: "对于BallTree或KDTree的叶子大小,影响效率。"
|
||
|
||
GaussianMixture:
|
||
parameters:
|
||
- name: "n_components"
|
||
type: "int"
|
||
default: "1"
|
||
description: "高斯混合分布的组件数,表示簇的数量。"
|
||
- name: "covariance_type"
|
||
type: "str"
|
||
default: "full"
|
||
description: "协方差类型,可选 'full', 'tied', 'diag', 'spherical'。"
|
||
- name: "tol"
|
||
type: "float"
|
||
default: "1e-3"
|
||
description: "收敛容忍度,当对数似然变化小于此值时停止算法。"
|
||
- name: "max_iter"
|
||
type: "int"
|
||
default: "100"
|
||
description: "最大迭代次数。"
|
||
- name: "random_state"
|
||
type: "int"
|
||
default: "None"
|
||
description: "随机种子,用于初始化高斯混合模型的参数。"
|
||
|
||
SpectralClustering:
|
||
parameters:
|
||
- name: "n_clusters"
|
||
type: "int"
|
||
default: "8"
|
||
description: "簇的数量。"
|
||
- name: "affinity"
|
||
type: "str"
|
||
default: "rbf"
|
||
description: "计算相似度的方式,可选 'rbf'、'nearest_neighbors'、'precomputed'。"
|
||
- name: "n_neighbors"
|
||
type: "int"
|
||
default: "10"
|
||
description: "对于 'nearest_neighbors' 相似度,使用的邻居数。"
|
||
- name: "gamma"
|
||
type: "float"
|
||
default: "1.0"
|
||
description: "用于计算径向基函数的gamma参数,影响相似度的计算。"
|
||
- name: "eigen_solver"
|
||
type: "str"
|
||
default: "auto"
|
||
description: "求解特征值的方式,可选 'arpack', 'lobpcg', 'auto'。"
|
||
- name: "random_state"
|
||
type: "int"
|
||
default: "None"
|
||
description: "随机种子,用于初始化谱聚类的计算。"
|
||
|
||
|