跳到内容

超参数优化 Facade

smac.facade.hyperparameter_optimization_facade #

HyperparameterOptimizationFacade #

HyperparameterOptimizationFacade(
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction
    | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer
    | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm
    | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder
    | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int
    | Path
    | Literal[False]
    | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None
)

基类: AbstractFacade

源代码位于 smac/facade/abstract_facade.py
def __init__(
    self,
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int | Path | Literal[False] | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None,
):
    setup_logging(logging_level)

    if callbacks is None:
        callbacks = []

    if model is None:
        model = self.get_model(scenario)

    if acquisition_function is None:
        acquisition_function = self.get_acquisition_function(scenario)

    if acquisition_maximizer is None:
        acquisition_maximizer = self.get_acquisition_maximizer(scenario)

    if initial_design is None:
        initial_design = self.get_initial_design(scenario)

    if random_design is None:
        random_design = self.get_random_design(scenario)

    if intensifier is None:
        intensifier = self.get_intensifier(scenario)

    if multi_objective_algorithm is None and scenario.count_objectives() > 1:
        multi_objective_algorithm = self.get_multi_objective_algorithm(scenario=scenario)

    if runhistory_encoder is None:
        runhistory_encoder = self.get_runhistory_encoder(scenario)

    if config_selector is None:
        config_selector = self.get_config_selector(scenario)

    # Initialize empty stats and runhistory object
    runhistory = RunHistory(multi_objective_algorithm=multi_objective_algorithm)

    # Set the seed for configuration space
    scenario.configspace.seed(scenario.seed)

    # Set variables globally
    self._scenario = scenario
    self._model = model
    self._acquisition_function = acquisition_function
    self._acquisition_maximizer = acquisition_maximizer
    self._initial_design = initial_design
    self._random_design = random_design
    self._intensifier = intensifier
    self._multi_objective_algorithm = multi_objective_algorithm
    self._runhistory = runhistory
    self._runhistory_encoder = runhistory_encoder
    self._config_selector = config_selector
    self._callbacks = callbacks
    self._overwrite = overwrite

    # Prepare the algorithm executer
    runner: AbstractRunner
    if isinstance(target_function, AbstractRunner):
        runner = target_function
    elif isinstance(target_function, str):
        runner = TargetFunctionScriptRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )
    else:
        runner = TargetFunctionRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )

    # In case of multiple jobs, we need to wrap the runner again using DaskParallelRunner
    if (n_workers := scenario.n_workers) > 1 or dask_client is not None:
        if dask_client is not None and n_workers > 1:
            logger.warning(
                "Provided `dask_client`. Ignore `scenario.n_workers`, directly set `n_workers` in `dask_client`."
            )
        else:
            available_workers = joblib.cpu_count()
            if n_workers > available_workers:
                logger.info(f"Workers are reduced to {n_workers}.")
                n_workers = available_workers

        # We use a dask runner for parallelization
        runner = DaskParallelRunner(single_worker=runner, dask_client=dask_client)

    # Set the runner to access it globally
    self._runner = runner

    # Adding dependencies of the components
    self._update_dependencies()

    # We have to update our meta data (basically arguments of the components)
    self._scenario._set_meta(self.meta)

    # We have to validate if the object compositions are correct and actually make sense
    self._validate()

    # Finally we configure our optimizer
    self._optimizer = self._get_optimizer()
    assert self._optimizer

    # Register callbacks here
    for callback in callbacks:
        self._optimizer.register_callback(callback)

    # Additionally, we register the runhistory callback from the intensifier to efficiently update our incumbent
    # every time new information are available
    self._optimizer.register_callback(self._intensifier.get_callback(), index=0)

intensifier property #

intensifier: AbstractIntensifier

负责 BO 循环的优化器。跟踪有用的信息,例如状态。

meta property #

meta: dict[str, Any]

根据 Facade 的所有组件生成一个哈希值。这用于运行名称或确定是否应该继续运行。

optimizer property #

optimizer: SMBO

负责 BO 循环的优化器。跟踪有用的信息,例如状态。

runhistory property #

runhistory: RunHistory

在优化过程中填充所有试验的运行历史。

scenario property #

scenario: Scenario

包含所有环境信息的场景对象。

ask #

ask() -> TrialInfo

向强化器询问下一个试验。

源代码位于 smac/facade/abstract_facade.py
def ask(self) -> TrialInfo:
    """Asks the intensifier for the next trial."""
    return self._optimizer.ask()

get_acquisition_function staticmethod #

get_acquisition_function(
    scenario: Scenario, *, xi: float = 0.0
) -> EI

返回一个期望改进获取函数。

参数#

scenario : 场景 xi : float, 默认为 0.0 控制获取函数探索与利用之间的平衡。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_acquisition_function(  # type: ignore
    scenario: Scenario,
    *,
    xi: float = 0.0,
) -> EI:
    """Returns an Expected Improvement acquisition function.

    Parameters
    ----------
    scenario : Scenario
    xi : float, defaults to 0.0
        Controls the balance between exploration and exploitation of the
        acquisition function.
    """
    return EI(xi=xi, log=True)

get_acquisition_maximizer staticmethod #

get_acquisition_maximizer(
    scenario: Scenario,
    *,
    challengers: int = 10000,
    local_search_iterations: int = 10
) -> LocalAndSortedRandomSearch

返回局部和排序随机搜索作为获取最大化器。

警告#

如果您遇到 RAM 问题,请尝试减少 challenger 的数量。

参数#

challengers : int, 默认为 10000 challenger 的数量。local_search_iterations: int, 默认为 10 局部搜索迭代次数。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_acquisition_maximizer(  # type: ignore
    scenario: Scenario,
    *,
    challengers: int = 10000,
    local_search_iterations: int = 10,
) -> LocalAndSortedRandomSearch:
    """Returns local and sorted random search as acquisition maximizer.

    Warning
    -------
    If you experience RAM issues, try to reduce the number of challengers.

    Parameters
    ----------
    challengers : int, defaults to 10000
        Number of challengers.
    local_search_iterations: int, defaults to 10
        Number of local search iterations.
    """
    optimizer = LocalAndSortedRandomSearch(
        scenario.configspace,
        challengers=challengers,
        local_search_iterations=local_search_iterations,
        seed=scenario.seed,
    )

    return optimizer

get_config_selector staticmethod #

get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16
) -> ConfigSelector

返回默认配置选择器。

源代码位于 smac/facade/abstract_facade.py
@staticmethod
def get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16,
) -> ConfigSelector:
    """Returns the default configuration selector."""
    return ConfigSelector(scenario, retrain_after=retrain_after, retries=retries)

get_initial_design staticmethod #

get_initial_design(
    scenario: Scenario,
    *,
    n_configs: int | None = None,
    n_configs_per_hyperparamter: int = 10,
    max_ratio: float = 0.25,
    additional_configs: list[Configuration] | None = None
) -> SobolInitialDesign

返回 Sobol 设计实例。

参数#

scenario : 场景 n_configs : int | None, 默认为 None 初始配置的数量(禁用参数 n_configs_per_hyperparameter)。n_configs_per_hyperparameter: int, 默认为 10 每个超参数的初始配置数量。例如,如果配置空间包含五个超参数,并且 n_configs_per_hyperparameter 设置为 10,则将采样 50 个初始配置。max_ratio: float, 默认为 0.25 初始设计中最多使用 scenario.n_trials * max_ratio 数量的配置。附加配置不受此参数影响。additional_configs: list[Configuration], 默认为 [] 向初始设计添加附加配置。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_initial_design(  # type: ignore
    scenario: Scenario,
    *,
    n_configs: int | None = None,
    n_configs_per_hyperparamter: int = 10,
    max_ratio: float = 0.25,
    additional_configs: list[Configuration] | None = None,
) -> SobolInitialDesign:
    """Returns a Sobol design instance.

    Parameters
    ----------
    scenario : Scenario
    n_configs : int | None, defaults to None
        Number of initial configurations (disables the arguments ``n_configs_per_hyperparameter``).
    n_configs_per_hyperparameter: int, defaults to 10
        Number of initial configurations per hyperparameter. For example, if my configuration space covers five
        hyperparameters and ``n_configs_per_hyperparameter`` is set to 10, then 50 initial configurations will be
        samples.
    max_ratio: float, defaults to 0.25
        Use at most ``scenario.n_trials`` * ``max_ratio`` number of configurations in the initial design.
        Additional configurations are not affected by this parameter.
    additional_configs: list[Configuration], defaults to []
        Adds additional configurations to the initial design.
    """
    return SobolInitialDesign(
        scenario=scenario,
        n_configs=n_configs,
        n_configs_per_hyperparameter=n_configs_per_hyperparamter,
        max_ratio=max_ratio,
        additional_configs=additional_configs,
    )

get_intensifier staticmethod #

get_intensifier(
    scenario: Scenario,
    *,
    max_config_calls: int = 3,
    max_incumbents: int = 10
) -> Intensifier

返回 Intensifier 作为强化器。使用 race_against 的默认配置。

参数#

scenario : 场景 max_config_calls : int, 默认为 3 配置评估的最大数量。基本上,一个配置最多应评估多少个实例-种子键。max_incumbents : int, 默认为 10 在多目标情况下跟踪多少个最佳配置。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_intensifier(  # type: ignore
    scenario: Scenario,
    *,
    max_config_calls: int = 3,
    max_incumbents: int = 10,
) -> Intensifier:
    """Returns ``Intensifier`` as intensifier. Uses the default configuration for ``race_against``.

    Parameters
    ----------
    scenario : Scenario
    max_config_calls : int, defaults to 3
        Maximum number of configuration evaluations. Basically, how many instance-seed keys should be max evaluated
        for a configuration.
    max_incumbents : int, defaults to 10
        How many incumbents to keep track of in the case of multi-objective.
    """
    return Intensifier(
        scenario=scenario,
        max_config_calls=max_config_calls,
        max_incumbents=max_incumbents,
    )

get_model staticmethod #

get_model(
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 1.0,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    max_depth: int = 2**20,
    bootstrapping: bool = True
) -> RandomForest

返回随机森林作为代理模型。

参数#

n_trees : int, 默认为 10 随机森林中的树的数量。ratio_features : float, 默认为 5.0 / 6.0 考虑用于分割的特征比例。min_samples_split : int, 默认为 3 执行分割所需的最少数据点数量。min_samples_leaf : int, 默认为 3 叶子中的最少数据点数量。max_depth : int, 默认为 20 单棵树的最大深度。bootstrapping : bool, 默认为 True 启用自举。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_model(  # type: ignore
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 1.0,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    max_depth: int = 2**20,
    bootstrapping: bool = True,
) -> RandomForest:
    """Returns a random forest as surrogate model.

    Parameters
    ----------
    n_trees : int, defaults to 10
        The number of trees in the random forest.
    ratio_features : float, defaults to 5.0 / 6.0
        The ratio of features that are considered for splitting.
    min_samples_split : int, defaults to 3
        The minimum number of data points to perform a split.
    min_samples_leaf : int, defaults to 3
        The minimum number of data points in a leaf.
    max_depth : int, defaults to 20
        The maximum depth of a single tree.
    bootstrapping : bool, defaults to True
        Enables bootstrapping.
    """
    return RandomForest(
        log_y=True,
        n_trees=n_trees,
        bootstrapping=bootstrapping,
        ratio_features=ratio_features,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_depth=max_depth,
        configspace=scenario.configspace,
        instance_features=scenario.instance_features,
        seed=scenario.seed,
    )

get_multi_objective_algorithm staticmethod #

get_multi_objective_algorithm(
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None
) -> MeanAggregationStrategy

返回多目标算法的平均聚合策略。

参数#

scenario : 场景 objective_weights : list[float] | None, 默认为 None 用于加权平均目标的权重。长度必须与目标数量相同。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_multi_objective_algorithm(  # type: ignore
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None,
) -> MeanAggregationStrategy:
    """Returns the mean aggregation strategy for the multi-objective algorithm.

    Parameters
    ----------
    scenario : Scenario
    objective_weights : list[float] | None, defaults to None
        Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of
        objectives.
    """
    return MeanAggregationStrategy(
        scenario=scenario,
        objective_weights=objective_weights,
    )

get_random_design staticmethod #

get_random_design(
    scenario: Scenario, *, probability: float = 0.2
) -> ProbabilityRandomDesign

返回 ProbabilityRandomDesign 用于交错配置。

参数#

probability : float, 默认为 0.2 随机抽取配置的概率。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_random_design(  # type: ignore
    scenario: Scenario,
    *,
    probability: float = 0.2,
) -> ProbabilityRandomDesign:
    """Returns ``ProbabilityRandomDesign`` for interleaving configurations.

    Parameters
    ----------
    probability : float, defaults to 0.2
        Probability that a configuration will be drawn at random.
    """
    return ProbabilityRandomDesign(probability=probability, seed=scenario.seed)

get_runhistory_encoder staticmethod #

get_runhistory_encoder(
    scenario: Scenario,
) -> RunHistoryLogScaledEncoder

返回对数缩放运行历史编码器。这意味着在训练代理模型之前,成本会进行对数缩放。

源代码位于 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_runhistory_encoder(  # type: ignore
    scenario: Scenario,
) -> RunHistoryLogScaledEncoder:
    """Returns a log scaled runhistory encoder. That means that costs are log scaled before
    training the surrogate model.
    """
    return RunHistoryLogScaledEncoder(scenario)

optimize #

optimize(
    *, data_to_scatter: dict[str, Any] | None = None
) -> Configuration | list[Configuration]

优化算法的配置。

参数#

data_to_scatter: dict[str, Any] | None 请注意,此参数仅对 dask_runner 有效!当用户将其本地进程中的数据分散到分布式网络时,这些数据会按核心数量分组进行轮询分散。大致来说,我们可以将这些数据保存在内存中,这样每次想要执行具有大数据集的目标函数时就不必序列化/反序列化数据。例如,当您的目标函数具有所有目标函数共享的大数据集时,此参数非常有用。

返回#

incumbent : Configuration 找到的最佳配置。

源代码位于 smac/facade/abstract_facade.py
def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configuration | list[Configuration]:
    """
    Optimizes the configuration of the algorithm.

    Parameters
    ----------
    data_to_scatter: dict[str, Any] | None
        We first note that this argument is valid only dask_runner!
        When a user scatters data from their local process to the distributed network,
        this data is distributed in a round-robin fashion grouping by number of cores.
        Roughly speaking, we can keep this data in memory and then we do not have to (de-)serialize the data
        every time we would like to execute a target function with a big dataset.
        For example, when your target function has a big dataset shared across all the target function,
        this argument is very useful.

    Returns
    -------
    incumbent : Configuration
        Best found configuration.
    """
    incumbents = None
    if isinstance(data_to_scatter, dict) and len(data_to_scatter) == 0:
        raise ValueError("data_to_scatter must be None or dict with some elements, but got an empty dict.")

    try:
        incumbents = self._optimizer.optimize(data_to_scatter=data_to_scatter)
    finally:
        self._optimizer.save()

    return incumbents

tell #

tell(
    info: TrialInfo, value: TrialValue, save: bool = True
) -> None

将试验结果添加到运行历史并更新强化器。

参数#

info: TrialInfo 描述要处理结果的试验。value: TrialValue 包含关于试验执行的相关信息。save : bool, 可选,默认为 True 是否应保存运行历史。

源代码位于 smac/facade/abstract_facade.py
def tell(self, info: TrialInfo, value: TrialValue, save: bool = True) -> None:
    """Adds the result of a trial to the runhistory and updates the intensifier.

    Parameters
    ----------
    info: TrialInfo
        Describes the trial from which to process the results.
    value: TrialValue
        Contains relevant information regarding the execution of a trial.
    save : bool, optional to True
        Whether the runhistory should be saved.
    """
    return self._optimizer.tell(info, value, save=save)

validate #

validate(
    config: Configuration, *, seed: int | None = None
) -> float | list[float]

在与优化过程中使用的种子不同的种子上以及最高预算下(如果预算类型为实数值)验证配置。

参数#

config : Configuration 要验证的配置。instances : list[str] | None, 默认为 None 要验证的实例。如果为 None,则使用场景中指定的所有实例。如果预算类型为实数值,则忽略此参数。seed : int | None, 默认为 None 如果为 None,则使用场景中的种子。

返回#

cost : float | list[float] 配置的平均成本。在多精度情况下,平均每个目标的成本。

源代码位于 smac/facade/abstract_facade.py
def validate(
    self,
    config: Configuration,
    *,
    seed: int | None = None,
) -> float | list[float]:
    """Validates a configuration on seeds different from the ones used in the optimization process and on the
    highest budget (if budget type is real-valued).

    Parameters
    ----------
    config : Configuration
        Configuration to validate
    instances : list[str] | None, defaults to None
        Which instances to validate. If None, all instances specified in the scenario are used.
        In case that the budget type is real-valued, this argument is ignored.
    seed : int | None, defaults to None
        If None, the seed from the scenario is used.

    Returns
    -------
    cost : float | list[float]
        The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is
        averaged.
    """
    return self._optimizer.validate(config, seed=seed)