MLPlatform/date_feature/parameter.yaml
2025-02-24 16:23:34 +08:00

256 lines
8.4 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

feature_engineering_methods:
LabelEncoder:
description: "将分类标签编码为整数。"
parameters: []
KBinsDiscretizer:
description: "将连续数据分箱为离散数据。"
parameters:
- name: "n_bins"
type: "int or array-like"
default: 5
description: "指定每个特征要分成的箱数。可以是单个整数,表示所有特征使用相同的箱数;也可以是形状为 (n_features,) 的数组,为每个特征指定不同的箱数。"
- name: "encode"
type: "str"
default: "onehot"
description: "指定离散化后输出的编码方式。可选值包括 'onehot'(独热编码)、'onehot-dense'(密集独热编码)、'ordinal'(序数编码)。"
- name: "strategy"
type: "str"
default: "quantile"
description: "定义分箱策略。可选值包括 'uniform'(均匀分箱)、'quantile'(分位数分箱)、'kmeans'K-Means 聚类分箱)。"
FunctionTransformer:
description: "对数据应用自定义函数进行转换。"
parameters:
- name: "func"
type: "callable"
default: null
description: "要应用于输入数据的函数。"
- name: "inverse_func"
type: "callable"
default: null
description: "func 的逆函数,如果存在。"
- name: "validate"
type: "bool"
default: false
description: "指示是否在转换前验证输入数据。"
- name: "accept_sparse"
type: "bool"
default: false
description: "指示是否接受稀疏矩阵作为输入。"
- name: "check_inverse"
type: "bool"
default: true
description: "指示在适合期间是否检查 func 和 inverse_func 是否互为逆函数。"
- name: "kw_args"
type: "dict"
default: null
description: "传递给 func 的其他关键字参数。"
- name: "inv_kw_args"
type: "dict"
default: null
description: "传递给 inverse_func 的其他关键字参数。"
PowerTransformer:
description: "对数据进行幂变换以使其更符合正态分布。"
parameters:
- name: "method"
type: "str"
default: "yeo-johnson"
description: "指定变换方法。可选值包括 'yeo-johnson' 和 'box-cox'。"
- name: "standardize"
type: "bool"
default: true
description: "指示是否在变换后将数据标准化为零均值和单位方差。"
- name: "copy"
type: "bool"
default: true
description: "指示是否复制输入数据,或在原地进行变换。"
QuantileTransformer:
description: "将数据转换为均匀分布或正态分布。"
parameters:
- name: "n_quantiles"
type: "int"
default: 1000
description: "用于分位数变换的分位数数量。"
- name: "output_distribution"
type: "str"
default: "uniform"
description: "指定输出分布。可选值包括 'uniform' 和 'normal'。"
- name: "ignore_implicit_zeros"
type: "bool"
default: false
description: "指示是否忽略隐式零。"
- name: "subsample"
type: "int"
default: 100000
description: "用于计算分位数的子样本大小。"
- name: "random_state"
type: "int or None"
default: null
description: "用于随机数生成的种子。"
- name: "copy"
type: "bool"
default: true
description: "指示是否复制输入数据,或在原地进行变换。"
FeatureHasher:
description: "使用哈希技巧将特征映射到向量。"
parameters:
- name: "n_features"
type: "int"
default: 1048576
description: "哈希空间的维度。"
- name: "input_type"
type: "str"
default: "dict"
description: "输入数据的类型。可选值包括 'dict' 和 'pair'。"
- name: "dtype"
type: "type"
default: "float64"
description: "输出数据的类型。"
- name: "alternate_sign"
type: "bool"
default: true
description: "指示是否在哈希时使用交替符号。"
DictVectorizer:
description: "将符号表示的特征(如字典)转换为稀疏矩阵。"
parameters:
- name: "dtype"
type: "type"
default: "float64"
description: "输出数据的类型。"
- name: "separator"
type: "str"
default: "="
description: "用于分隔特征名称的分隔符。"
- name: "sparse"
type: "bool"
default: true
description: "指示是否返回稀疏矩阵。"
- name: "sort"
type: "bool"
default: true
description: "指示是否对特征名称排序。"
PCA:
description: "主成分分析,用于降维。"
parameters:
- name: "n_components"
type: "int, float, None or str"
default: null
description: "要保留的主成分数量。可以是整数、浮点数或 'mle'。"
- name: "copy"
type: "bool"
default: true
description: "指示是否复制输入数据,或在原地进行变换。"
- name: "whiten"
type: "bool"
default: false
description: "指示是否对主成分进行白化。"
- name: "svd_solver"
type: "str"
default: "auto"
description: "用于计算 SVD 的方法。可选值包括 'auto'、'full'、'arpack' 和 'randomized'。"
- name: "tol"
type: "float"
default: 0.0
description: "奇异值分解的容差。"
- name: "iterated_power"
type: "int or 'auto'"
default: 'auto'
description: "用于随机化 SVD 的迭代次数。"
- name: "random_state"
type: "int or None"
default: null
description: "用于随机数生成的种子。"
SelectKBest:
description: "选择最重要的 K 个特征。"
parameters:
- name: "score_func"
type: "callable"
default: "f_classif"
description: "用于计算特征得分的函数。"
- name: "k"
type: "int"
default: 10
description: "要选择的特征数量。"
RFE:
description: "递归特征消除,用于选择最重要的特征。"
parameters:
- name: "estimator"
type: "object"
default: null
description: "用于特征选择的基模型。"
- name: "n_features_to_select"
type: "int"
default: null
description: "要选择的特征数量。"
- name: "step"
type: "int"
default: 1
description: "每次迭代要移除的特征数量。"
- name: "verbose"
type: "int"
default: 0
description: "控制冗长模式的整数。"
PolynomialFeatures:
description: "生成多项式特征,增加模型的非线性能力。"
parameters:
- name: "degree"
type: "int"
default: 2
description: "生成多项式特征的最高次数。"
- name: "interaction_only"
type: "bool"
default: false
description: "指示是否仅包含交互项。"
- name: "include_bias"
type: "bool"
default: true
description: "指示是否包含偏置列。"
- name: "order"
type: "str"
default: "C"
description: "输出特征的顺序。可选值包括 'C' 和 'F'。"
OneHotEncoder:
description: "将分类特征转换为独热编码。"
parameters:
- name: "categories"
type: "str or list or 'auto'"
default: "auto"
description: "指定每个特征的类别。"
- name: "drop"
type: "str or array-like"
default: null
description: "指定要从每个特征中删除的类别。"
- name: "sparse"
type: "bool"
default: true
description: "指示是否返回稀疏矩阵。"
- name: "dtype"
type: "type"
default: "float64"
description: "输出数据的类型。"
- name: "handle_unknown"
type: "str"
default: "error"
description: "指定如何处理未知类别。可选值包括 'error'(抛出异常)、'ignore'(忽略)。"
- name: "max_categories"
type: "int or None"
default: null
description: "在类别过多时,将类别限制为最大类别数量。"