跳过内容

多保真门面

smac.facade.multi_fidelity_facade #

MultiFidelityFacade #

MultiFidelityFacade(
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction
    | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer
    | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm
    | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder
    | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int
    | Path
    | Literal[False]
    | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None
)

基类: HyperparameterOptimizationFacade

这个门面在多保真设置中配置SMAC。

警告

smac.main.config_selector.ConfigSelector 包含 min_trials 参数。此参数确定训练代理模型所需的样本数量。如果涉及预算,会首先检查最高预算。例如,如果 min_trials 是三,但在运行历史中只找到最高预算的两个试验,我们将使用较低预算的试验来代替。

源代码在 smac/facade/abstract_facade.py
def __init__(
    self,
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int | Path | Literal[False] | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None,
):
    setup_logging(logging_level)

    if callbacks is None:
        callbacks = []

    if model is None:
        model = self.get_model(scenario)

    if acquisition_function is None:
        acquisition_function = self.get_acquisition_function(scenario)

    if acquisition_maximizer is None:
        acquisition_maximizer = self.get_acquisition_maximizer(scenario)

    if initial_design is None:
        initial_design = self.get_initial_design(scenario)

    if random_design is None:
        random_design = self.get_random_design(scenario)

    if intensifier is None:
        intensifier = self.get_intensifier(scenario)

    if multi_objective_algorithm is None and scenario.count_objectives() > 1:
        multi_objective_algorithm = self.get_multi_objective_algorithm(scenario=scenario)

    if runhistory_encoder is None:
        runhistory_encoder = self.get_runhistory_encoder(scenario)

    if config_selector is None:
        config_selector = self.get_config_selector(scenario)

    # Initialize empty stats and runhistory object
    runhistory = RunHistory(multi_objective_algorithm=multi_objective_algorithm)

    # Set the seed for configuration space
    scenario.configspace.seed(scenario.seed)

    # Set variables globally
    self._scenario = scenario
    self._model = model
    self._acquisition_function = acquisition_function
    self._acquisition_maximizer = acquisition_maximizer
    self._initial_design = initial_design
    self._random_design = random_design
    self._intensifier = intensifier
    self._multi_objective_algorithm = multi_objective_algorithm
    self._runhistory = runhistory
    self._runhistory_encoder = runhistory_encoder
    self._config_selector = config_selector
    self._callbacks = callbacks
    self._overwrite = overwrite

    # Prepare the algorithm executer
    runner: AbstractRunner
    if isinstance(target_function, AbstractRunner):
        runner = target_function
    elif isinstance(target_function, str):
        runner = TargetFunctionScriptRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )
    else:
        runner = TargetFunctionRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )

    # In case of multiple jobs, we need to wrap the runner again using DaskParallelRunner
    if (n_workers := scenario.n_workers) > 1 or dask_client is not None:
        if dask_client is not None and n_workers > 1:
            logger.warning(
                "Provided `dask_client`. Ignore `scenario.n_workers`, directly set `n_workers` in `dask_client`."
            )
        else:
            available_workers = joblib.cpu_count()
            if n_workers > available_workers:
                logger.info(f"Workers are reduced to {n_workers}.")
                n_workers = available_workers

        # We use a dask runner for parallelization
        runner = DaskParallelRunner(single_worker=runner, dask_client=dask_client)

    # Set the runner to access it globally
    self._runner = runner

    # Adding dependencies of the components
    self._update_dependencies()

    # We have to update our meta data (basically arguments of the components)
    self._scenario._set_meta(self.meta)

    # We have to validate if the object compositions are correct and actually make sense
    self._validate()

    # Finally we configure our optimizer
    self._optimizer = self._get_optimizer()
    assert self._optimizer

    # Register callbacks here
    for callback in callbacks:
        self._optimizer.register_callback(callback)

    # Additionally, we register the runhistory callback from the intensifier to efficiently update our incumbent
    # every time new information are available
    self._optimizer.register_callback(self._intensifier.get_callback(), index=0)

intensifier 属性 #

intensifier: AbstractIntensifier

负责贝叶斯优化(BO)循环的优化器。跟踪有用的信息,如状态。

meta 属性 #

meta: dict[str, Any]

基于门面的所有组件生成哈希值。用于运行名称或确定是否应继续运行。

optimizer 属性 #

optimizer: SMBO

负责贝叶斯优化(BO)循环的优化器。跟踪有用的信息,如状态。

runhistory 属性 #

runhistory: RunHistory

在优化过程中填充所有试验的运行历史。

scenario 属性 #

scenario: Scenario

包含所有环境信息的场景对象。

ask #

ask() -> TrialInfo

向强化器询问下一个试验。

源代码在 smac/facade/abstract_facade.py
def ask(self) -> TrialInfo:
    """Asks the intensifier for the next trial."""
    return self._optimizer.ask()

get_acquisition_function 静态方法 #

get_acquisition_function(
    scenario: Scenario, *, xi: float = 0.0
) -> EI

返回一个预期改进采集函数。

#参数

scenario : Scenario xi : float, 默认为 0.0 控制采集函数的探索与利用之间的平衡。

源代码在 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_acquisition_function(  # type: ignore
    scenario: Scenario,
    *,
    xi: float = 0.0,
) -> EI:
    """Returns an Expected Improvement acquisition function.

    Parameters
    ----------
    scenario : Scenario
    xi : float, defaults to 0.0
        Controls the balance between exploration and exploitation of the
        acquisition function.
    """
    return EI(xi=xi, log=True)

get_acquisition_maximizer 静态方法 #

get_acquisition_maximizer(
    scenario: Scenario,
    *,
    challengers: int = 10000,
    local_search_iterations: int = 10
) -> LocalAndSortedRandomSearch

返回局部搜索和排序随机搜索作为采集最大化器。

#警告

如果遇到内存问题,请尝试减少挑战者数量。

#参数

challengers : int, 默认为 10000 挑战者数量。 local_search_iterations: int, 默认为 10 局部搜索迭代次数。

源代码在 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_acquisition_maximizer(  # type: ignore
    scenario: Scenario,
    *,
    challengers: int = 10000,
    local_search_iterations: int = 10,
) -> LocalAndSortedRandomSearch:
    """Returns local and sorted random search as acquisition maximizer.

    Warning
    -------
    If you experience RAM issues, try to reduce the number of challengers.

    Parameters
    ----------
    challengers : int, defaults to 10000
        Number of challengers.
    local_search_iterations: int, defaults to 10
        Number of local search iterations.
    """
    optimizer = LocalAndSortedRandomSearch(
        scenario.configspace,
        challengers=challengers,
        local_search_iterations=local_search_iterations,
        seed=scenario.seed,
    )

    return optimizer

get_config_selector 静态方法 #

get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16
) -> ConfigSelector

返回默认配置选择器。

源代码在 smac/facade/abstract_facade.py
@staticmethod
def get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16,
) -> ConfigSelector:
    """Returns the default configuration selector."""
    return ConfigSelector(scenario, retrain_after=retrain_after, retries=retries)

get_initial_design 静态方法 #

get_initial_design(
    scenario: Scenario,
    *,
    n_configs: int | None = None,
    n_configs_per_hyperparamter: int = 10,
    max_ratio: float = 0.25,
    additional_configs: list[Configuration] = None
) -> RandomInitialDesign

返回一个随机初始设计。

#参数

scenario : Scenario n_configs : int | None, 默认为 None 初始配置数量(禁用参数 n_configs_per_hyperparameter)。 n_configs_per_hyperparameter: int, 默认为 10 每个超参数的初始配置数量。例如,如果我的配置空间包含五个超参数,并且 n_configs_per_hyperparameter 设置为 10,则将采样 50 个初始配置。 max_ratio: float, 默认为 0.25 初始设计中最多使用 scenario.n_trials * max_ratio 数量的配置。附加配置不受此参数影响。 additional_configs: list[Configuration], 默认为 [] 向初始设计添加附加配置。

源代码在 smac/facade/multi_fidelity_facade.py
@staticmethod
def get_initial_design(  # type: ignore
    scenario: Scenario,
    *,
    n_configs: int | None = None,
    n_configs_per_hyperparamter: int = 10,
    max_ratio: float = 0.25,
    additional_configs: list[Configuration] = None,
) -> RandomInitialDesign:
    """Returns a random initial design.

    Parameters
    ----------
    scenario : Scenario
    n_configs : int | None, defaults to None
        Number of initial configurations (disables the arguments ``n_configs_per_hyperparameter``).
    n_configs_per_hyperparameter: int, defaults to 10
        Number of initial configurations per hyperparameter. For example, if my configuration space covers five
        hyperparameters and ``n_configs_per_hyperparameter`` is set to 10, then 50 initial configurations will be
        samples.
    max_ratio: float, defaults to 0.25
        Use at most ``scenario.n_trials`` * ``max_ratio`` number of configurations in the initial design.
        Additional configurations are not affected by this parameter.
    additional_configs: list[Configuration], defaults to []
        Adds additional configurations to the initial design.
    """
    if additional_configs is None:
        additional_configs = []
    return RandomInitialDesign(
        scenario=scenario,
        n_configs=n_configs,
        n_configs_per_hyperparameter=n_configs_per_hyperparamter,
        max_ratio=max_ratio,
        additional_configs=additional_configs,
    )

get_intensifier 静态方法 #

get_intensifier(
    scenario: Scenario,
    *,
    eta: int = 3,
    n_seeds: int = 1,
    instance_seed_order: str | None = "shuffle_once",
    max_incumbents: int = 10,
    incumbent_selection: str = "highest_observed_budget"
) -> Hyperband

返回一个支持预算的 Hyperband 强化器实例。

int, 默认为 3

控制逐次减半每一轮中丢弃配置比例的输入。

n_seeds : int, 默认为 1 每个实例使用的种子数量。 instance_seed_order : str, 默认为 "shuffle_once" 如何排序实例-种子对。可以设置为: * None:完全不打乱,使用用户提供的实例-种子顺序。 * "shuffle_once":一次打乱实例-种子键,并在所有运行中使用相同的顺序。 * "shuffle":为每个括号单独打乱实例-种子键。 incumbent_selection : str, 默认为 "any_budget" 使用预算时如何选择最优配置。可以设置为: * "any_budget":最优配置是在任何预算下表现最好的,即无论预算如何,都是最佳性能。 * "highest_observed_budget":最优配置是迄今为止在最高预算运行中表现最好的。更多详细信息请参阅 runhistory.get_trials。关键的是,如果为 true,则对于给定的配置-实例-种子,仅使用最高(迄今为止已执行的)预算来与最优配置进行比较。请注意,如果观察到的最高预算小于最优配置的最高预算,该配置将再次排队进行强化。 * "highest_budget":仅基于可用的绝对最高预算来选择最优配置。 max_incumbents : int, 默认为 10 在多目标情况下跟踪的最优配置数量。

源代码在 smac/facade/multi_fidelity_facade.py
@staticmethod
def get_intensifier(  # type: ignore
    scenario: Scenario,
    *,
    eta: int = 3,
    n_seeds: int = 1,
    instance_seed_order: str | None = "shuffle_once",
    max_incumbents: int = 10,
    incumbent_selection: str = "highest_observed_budget",
) -> Hyperband:
    """Returns a Hyperband intensifier instance. Budgets are supported.

    eta : int, defaults to 3
        Input that controls the proportion of configurations discarded in each round of Successive Halving.
    n_seeds : int, defaults to 1
        How many seeds to use for each instance.
    instance_seed_order : str, defaults to "shuffle_once"
        How to order the instance-seed pairs. Can be set to:
        * None: No shuffling at all and use the instance-seed order provided by the user.
        * "shuffle_once": Shuffle the instance-seed keys once and use the same order across all runs.
        * "shuffle": Shuffles the instance-seed keys for each bracket individually.
    incumbent_selection : str, defaults to "any_budget"
        How to select the incumbent when using budgets. Can be set to:
        * "any_budget": Incumbent is the best on any budget, i.e., the best performance regardless of budget.
        * "highest_observed_budget": Incumbent is the best in the highest budget run so far.
        refer to `runhistory.get_trials` for more details. Crucially, if true, then a
        for a given config-instance-seed, only the highest (so far executed) budget is used for
        comparison against the incumbent. Notice, that if the highest observed budget is smaller
        than the highest budget of the incumbent, the configuration will be queued again to
        be intensified again.
        * "highest_budget": Incumbent is selected only based on the absolute highest budget
        available only.
    max_incumbents : int, defaults to 10
        How many incumbents to keep track of in the case of multi-objective.
    """
    return Hyperband(
        scenario=scenario,
        eta=eta,
        n_seeds=n_seeds,
        instance_seed_order=instance_seed_order,
        max_incumbents=max_incumbents,
        incumbent_selection=incumbent_selection,
    )

get_model 静态方法 #

get_model(
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 1.0,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    max_depth: int = 2**20,
    bootstrapping: bool = True
) -> RandomForest

返回一个随机森林作为代理模型。

#参数

n_trees : int, 默认为 10 随机森林中的树数量。 ratio_features : float, 默认为 5.0 / 6.0 考虑用于分割的特征比例。 min_samples_split : int, 默认为 3 进行分割所需的最少数据点数量。 min_samples_leaf : int, 默认为 3 叶节点所需的最少数据点数量。 max_depth : int, 默认为 20 单棵树的最大深度。 bootstrapping : bool, 默认为 True 启用自助法。

源代码在 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_model(  # type: ignore
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 1.0,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    max_depth: int = 2**20,
    bootstrapping: bool = True,
) -> RandomForest:
    """Returns a random forest as surrogate model.

    Parameters
    ----------
    n_trees : int, defaults to 10
        The number of trees in the random forest.
    ratio_features : float, defaults to 5.0 / 6.0
        The ratio of features that are considered for splitting.
    min_samples_split : int, defaults to 3
        The minimum number of data points to perform a split.
    min_samples_leaf : int, defaults to 3
        The minimum number of data points in a leaf.
    max_depth : int, defaults to 20
        The maximum depth of a single tree.
    bootstrapping : bool, defaults to True
        Enables bootstrapping.
    """
    return RandomForest(
        log_y=True,
        n_trees=n_trees,
        bootstrapping=bootstrapping,
        ratio_features=ratio_features,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_depth=max_depth,
        configspace=scenario.configspace,
        instance_features=scenario.instance_features,
        seed=scenario.seed,
    )

get_multi_objective_algorithm 静态方法 #

get_multi_objective_algorithm(
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None
) -> MeanAggregationStrategy

返回多目标算法的均值聚合策略。

#参数

scenario : Scenario objective_weights : list[float] | None, 默认为 None 用于加权平均目标的权重。必须与目标数量长度相同。

源代码在 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_multi_objective_algorithm(  # type: ignore
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None,
) -> MeanAggregationStrategy:
    """Returns the mean aggregation strategy for the multi-objective algorithm.

    Parameters
    ----------
    scenario : Scenario
    objective_weights : list[float] | None, defaults to None
        Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of
        objectives.
    """
    return MeanAggregationStrategy(
        scenario=scenario,
        objective_weights=objective_weights,
    )

get_random_design 静态方法 #

get_random_design(
    scenario: Scenario, *, probability: float = 0.2
) -> ProbabilityRandomDesign

返回用于交错配置的 ProbabilityRandomDesign

#参数

probability : float, 默认为 0.2 随机抽取配置的概率。

源代码在 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_random_design(  # type: ignore
    scenario: Scenario,
    *,
    probability: float = 0.2,
) -> ProbabilityRandomDesign:
    """Returns ``ProbabilityRandomDesign`` for interleaving configurations.

    Parameters
    ----------
    probability : float, defaults to 0.2
        Probability that a configuration will be drawn at random.
    """
    return ProbabilityRandomDesign(probability=probability, seed=scenario.seed)

get_runhistory_encoder 静态方法 #

get_runhistory_encoder(
    scenario: Scenario,
) -> RunHistoryLogScaledEncoder

返回一个对数缩放的运行历史编码器。这意味着在训练代理模型之前,成本会进行对数缩放。

源代码在 smac/facade/hyperparameter_optimization_facade.py
@staticmethod
def get_runhistory_encoder(  # type: ignore
    scenario: Scenario,
) -> RunHistoryLogScaledEncoder:
    """Returns a log scaled runhistory encoder. That means that costs are log scaled before
    training the surrogate model.
    """
    return RunHistoryLogScaledEncoder(scenario)

optimize #

optimize(
    *, data_to_scatter: dict[str, Any] | None = None
) -> Configuration | list[Configuration]

优化算法的配置。

#参数

data_to_scatter: dict[str, Any] | None 我们首先注意到,此参数仅适用于 dask_runner!当用户将数据从其本地进程分散到分布式网络时,这些数据会根据核心数量进行分组并以轮询方式分发。粗略地说,我们可以将这些数据保存在内存中,这样每次想要使用大数据集执行目标函数时,就不必序列化/反序列化数据。例如,当您的目标函数有一个跨所有目标函数共享的大数据集时,此参数非常有用。

#返回值

incumbent : Configuration 找到的最佳配置。

源代码在 smac/facade/abstract_facade.py
def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configuration | list[Configuration]:
    """
    Optimizes the configuration of the algorithm.

    Parameters
    ----------
    data_to_scatter: dict[str, Any] | None
        We first note that this argument is valid only dask_runner!
        When a user scatters data from their local process to the distributed network,
        this data is distributed in a round-robin fashion grouping by number of cores.
        Roughly speaking, we can keep this data in memory and then we do not have to (de-)serialize the data
        every time we would like to execute a target function with a big dataset.
        For example, when your target function has a big dataset shared across all the target function,
        this argument is very useful.

    Returns
    -------
    incumbent : Configuration
        Best found configuration.
    """
    incumbents = None
    if isinstance(data_to_scatter, dict) and len(data_to_scatter) == 0:
        raise ValueError("data_to_scatter must be None or dict with some elements, but got an empty dict.")

    try:
        incumbents = self._optimizer.optimize(data_to_scatter=data_to_scatter)
    finally:
        self._optimizer.save()

    return incumbents

tell #

tell(
    info: TrialInfo, value: TrialValue, save: bool = True
) -> None

将试验结果添加到运行历史并更新强化器。

#参数

info: TrialInfo 描述要处理结果的试验。 value: TrialValue 包含有关试验执行的相关信息。 save : bool, optional 默认为 True 是否应保存运行历史。

源代码在 smac/facade/abstract_facade.py
def tell(self, info: TrialInfo, value: TrialValue, save: bool = True) -> None:
    """Adds the result of a trial to the runhistory and updates the intensifier.

    Parameters
    ----------
    info: TrialInfo
        Describes the trial from which to process the results.
    value: TrialValue
        Contains relevant information regarding the execution of a trial.
    save : bool, optional to True
        Whether the runhistory should be saved.
    """
    return self._optimizer.tell(info, value, save=save)

validate #

validate(
    config: Configuration, *, seed: int | None = None
) -> float | list[float]

在与优化过程中使用的种子不同以及最高预算(如果预算类型为实数值)的种子上验证配置。

#参数

config : Configuration 要验证的配置 instances : list[str] | None, 默认为 None 要验证的实例。如果为 None,则使用场景中指定的所有实例。如果预算类型为实数值,则忽略此参数。 seed : int | None, 默认为 None 如果为 None,则使用场景中的种子。

#返回值

cost : float | list[float] 配置的平均成本。在多保真情况下,每个目标的成本都会被平均。

源代码在 smac/facade/abstract_facade.py
def validate(
    self,
    config: Configuration,
    *,
    seed: int | None = None,
) -> float | list[float]:
    """Validates a configuration on seeds different from the ones used in the optimization process and on the
    highest budget (if budget type is real-valued).

    Parameters
    ----------
    config : Configuration
        Configuration to validate
    instances : list[str] | None, defaults to None
        Which instances to validate. If None, all instances specified in the scenario are used.
        In case that the budget type is real-valued, this argument is ignored.
    seed : int | None, defaults to None
        If None, the seed from the scenario is used.

    Returns
    -------
    cost : float | list[float]
        The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is
        averaged.
    """
    return self._optimizer.validate(config, seed=seed)