跳到内容

黑盒外观

smac.facade.blackbox_facade #

BlackBoxFacade #

BlackBoxFacade(
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction
    | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer
    | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm
    | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder
    | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int
    | Path
    | Literal[False]
    | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None
)

基类:AbstractFacade

源代码位于 smac/facade/abstract_facade.py
def __init__(
    self,
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int | Path | Literal[False] | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None,
):
    setup_logging(logging_level)

    if callbacks is None:
        callbacks = []

    if model is None:
        model = self.get_model(scenario)

    if acquisition_function is None:
        acquisition_function = self.get_acquisition_function(scenario)

    if acquisition_maximizer is None:
        acquisition_maximizer = self.get_acquisition_maximizer(scenario)

    if initial_design is None:
        initial_design = self.get_initial_design(scenario)

    if random_design is None:
        random_design = self.get_random_design(scenario)

    if intensifier is None:
        intensifier = self.get_intensifier(scenario)

    if multi_objective_algorithm is None and scenario.count_objectives() > 1:
        multi_objective_algorithm = self.get_multi_objective_algorithm(scenario=scenario)

    if runhistory_encoder is None:
        runhistory_encoder = self.get_runhistory_encoder(scenario)

    if config_selector is None:
        config_selector = self.get_config_selector(scenario)

    # Initialize empty stats and runhistory object
    runhistory = RunHistory(multi_objective_algorithm=multi_objective_algorithm)

    # Set the seed for configuration space
    scenario.configspace.seed(scenario.seed)

    # Set variables globally
    self._scenario = scenario
    self._model = model
    self._acquisition_function = acquisition_function
    self._acquisition_maximizer = acquisition_maximizer
    self._initial_design = initial_design
    self._random_design = random_design
    self._intensifier = intensifier
    self._multi_objective_algorithm = multi_objective_algorithm
    self._runhistory = runhistory
    self._runhistory_encoder = runhistory_encoder
    self._config_selector = config_selector
    self._callbacks = callbacks
    self._overwrite = overwrite

    # Prepare the algorithm executer
    runner: AbstractRunner
    if isinstance(target_function, AbstractRunner):
        runner = target_function
    elif isinstance(target_function, str):
        runner = TargetFunctionScriptRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )
    else:
        runner = TargetFunctionRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )

    # In case of multiple jobs, we need to wrap the runner again using DaskParallelRunner
    if (n_workers := scenario.n_workers) > 1 or dask_client is not None:
        if dask_client is not None and n_workers > 1:
            logger.warning(
                "Provided `dask_client`. Ignore `scenario.n_workers`, directly set `n_workers` in `dask_client`."
            )
        else:
            available_workers = joblib.cpu_count()
            if n_workers > available_workers:
                logger.info(f"Workers are reduced to {n_workers}.")
                n_workers = available_workers

        # We use a dask runner for parallelization
        runner = DaskParallelRunner(single_worker=runner, dask_client=dask_client)

    # Set the runner to access it globally
    self._runner = runner

    # Adding dependencies of the components
    self._update_dependencies()

    # We have to update our meta data (basically arguments of the components)
    self._scenario._set_meta(self.meta)

    # We have to validate if the object compositions are correct and actually make sense
    self._validate()

    # Finally we configure our optimizer
    self._optimizer = self._get_optimizer()
    assert self._optimizer

    # Register callbacks here
    for callback in callbacks:
        self._optimizer.register_callback(callback)

    # Additionally, we register the runhistory callback from the intensifier to efficiently update our incumbent
    # every time new information are available
    self._optimizer.register_callback(self._intensifier.get_callback(), index=0)

intensifier 属性 #

intensifier: AbstractIntensifier

负责 BO 循环的优化器。跟踪有用的信息,例如状态。

meta 属性 #

meta: dict[str, Any]

根据外观的所有组件生成哈希值。这用于运行名称或确定是否应继续运行。

optimizer 属性 #

optimizer: SMBO

负责 BO 循环的优化器。跟踪有用的信息,例如状态。

runhistory 属性 #

runhistory: RunHistory

在优化过程中填充所有试验的运行历史。

scenario 属性 #

scenario: Scenario

包含所有环境信息的场景对象。

ask #

ask() -> TrialInfo

向强化器询问下一个试验。

源代码位于 smac/facade/abstract_facade.py
def ask(self) -> TrialInfo:
    """Asks the intensifier for the next trial."""
    return self._optimizer.ask()

get_acquisition_function 静态方法 #

get_acquisition_function(
    scenario: Scenario, *, xi: float = 0.0
) -> EI

返回预期改进采集函数。

参数#

scenario : Scenario xi : float, 默认为 0.0 控制采集函数的探索与利用之间的平衡。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_acquisition_function(  # type: ignore
    scenario: Scenario,
    *,
    xi: float = 0.0,
) -> EI:
    """Returns an Expected Improvement acquisition function.

    Parameters
    ----------
    scenario : Scenario
    xi : float, defaults to 0.0
        Controls the balance between exploration and exploitation of the
        acquisition function.
    """
    return EI(xi=xi)

get_acquisition_maximizer 静态方法 #

get_acquisition_maximizer(
    scenario: Scenario,
    *,
    challengers: int = 1000,
    local_search_iterations: int = 10
) -> LocalAndSortedRandomSearch

返回本地搜索和排序随机搜索作为采集最大化器。

参数#

challengers : int, 默认为 1000 挑战者数量。local_search_iterations: int, 默认为 10 局部搜索迭代次数。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_acquisition_maximizer(  # type: ignore
    scenario: Scenario,
    *,
    challengers: int = 1000,
    local_search_iterations: int = 10,
) -> LocalAndSortedRandomSearch:
    """Returns local and sorted random search as acquisition maximizer.

    Parameters
    ----------
    challengers : int, defaults to 1000
        Number of challengers.
    local_search_iterations: int, defaults to 10
        Number of local search iterations.
    """
    return LocalAndSortedRandomSearch(
        configspace=scenario.configspace,
        challengers=challengers,
        local_search_iterations=local_search_iterations,
        seed=scenario.seed,
    )

get_config_selector 静态方法 #

get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 1,
    retries: int = 16
) -> ConfigSelector

返回默认配置选择器。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 1,
    retries: int = 16,
) -> ConfigSelector:
    """Returns the default configuration selector."""
    return super(BlackBoxFacade, BlackBoxFacade).get_config_selector(
        scenario, retrain_after=retrain_after, retries=retries
    )

get_initial_design 静态方法 #

get_initial_design(
    scenario: Scenario,
    *,
    n_configs: int | None = None,
    n_configs_per_hyperparamter: int = 8,
    max_ratio: float = 0.25,
    additional_configs: list[Configuration] = None
) -> SobolInitialDesign

返回 Sobol 设计实例。

参数#

scenario : Scenario n_configs : int | None, 默认为 None 初始配置的数量(禁用参数 n_configs_per_hyperparameter)。n_configs_per_hyperparameter: int, 默认为 8 每个超参数的初始配置数量。例如,如果我的配置空间包含五个超参数且 n_configs_per_hyperparameter 设置为 10,则将采样 50 个初始配置。max_ratio: float, 默认为 0.25 在初始设计中最多使用 scenario.n_trials * max_ratio 数量的配置。附加配置不受此参数影响。additional_configs: list[Configuration], 默认为 [] 向初始设计添加附加配置。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_initial_design(  # type: ignore
    scenario: Scenario,
    *,
    n_configs: int | None = None,
    n_configs_per_hyperparamter: int = 8,
    max_ratio: float = 0.25,
    additional_configs: list[Configuration] = None,
) -> SobolInitialDesign:
    """Returns a Sobol design instance.

    Parameters
    ----------
    scenario : Scenario
    n_configs : int | None, defaults to None
        Number of initial configurations (disables the arguments ``n_configs_per_hyperparameter``).
    n_configs_per_hyperparameter: int, defaults to 8
        Number of initial configurations per hyperparameter. For example, if my configuration space covers five
        hyperparameters and ``n_configs_per_hyperparameter`` is set to 10, then 50 initial configurations will be
        samples.
    max_ratio: float, defaults to 0.25
        Use at most ``scenario.n_trials`` * ``max_ratio`` number of configurations in the initial design.
        Additional configurations are not affected by this parameter.
    additional_configs: list[Configuration], defaults to []
        Adds additional configurations to the initial design.
    """
    if additional_configs is None:
        additional_configs = []
    return SobolInitialDesign(
        scenario=scenario,
        n_configs=n_configs,
        n_configs_per_hyperparameter=n_configs_per_hyperparamter,
        max_ratio=max_ratio,
        additional_configs=additional_configs,
        seed=scenario.seed,
    )

get_intensifier 静态方法 #

get_intensifier(
    scenario: Scenario,
    *,
    max_config_calls: int = 3,
    max_incumbents: int = 20
) -> Intensifier

返回 Intensifier 作为强化器。使用 race_against 的默认配置。

参数#

scenario : Scenario max_config_calls : int, 默认为 3 最大配置评估次数。基本上,对于一个配置最多应该评估多少个实例-种子键。max_incumbents : int, 默认为 10 在多目标情况下跟踪多少个最优配置。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_intensifier(  # type: ignore
    scenario: Scenario,
    *,
    max_config_calls: int = 3,
    max_incumbents: int = 20,
) -> Intensifier:
    """Returns ``Intensifier`` as intensifier. Uses the default configuration for ``race_against``.

    Parameters
    ----------
    scenario : Scenario
    max_config_calls : int, defaults to 3
        Maximum number of configuration evaluations. Basically, how many instance-seed keys should be evaluated at
        maximum for a configuration.
    max_incumbents : int, defaults to 10
        How many incumbents to keep track of in the case of multi-objective.
    """
    return Intensifier(
        scenario=scenario,
        max_config_calls=max_config_calls,
        max_incumbents=max_incumbents,
    )

get_kernel 静态方法 #

get_kernel(scenario: Scenario) -> Kernel

返回高斯过程代理模型的核函数。

核函数是根据超参数类型组成的复合核函数:分类(HammingKernel)、连续(Matern)和噪声核函数(White)。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_kernel(scenario: Scenario) -> kernels.Kernel:
    """Returns a kernel for the Gaussian Process surrogate model.

    The kernel is a composite of kernels depending on the type of hyperparameters:
    categorical (HammingKernel), continuous (Matern), and noise kernels (White).
    """
    types, _ = get_types(scenario.configspace, instance_features=None)
    cont_dims = np.where(np.array(types) == 0)[0]
    cat_dims = np.where(np.array(types) != 0)[0]

    if (len(cont_dims) + len(cat_dims)) != len(list(scenario.configspace.values())):
        raise ValueError(
            "The inferred number of continuous and categorical hyperparameters "
            "must equal the total number of hyperparameters. Got "
            f"{(len(cont_dims) + len(cat_dims))} != {len(list(scenario.configspace.values()))}."
        )

    # Constant Kernel
    cov_amp = ConstantKernel(
        2.0,
        constant_value_bounds=(np.exp(-10), np.exp(2)),
        prior=LogNormalPrior(
            mean=0.0,
            sigma=1.0,
            seed=scenario.seed,
        ),
    )

    # Continuous / Categorical Kernels
    exp_kernel, ham_kernel = 0.0, 0.0
    if len(cont_dims) > 0:
        exp_kernel = MaternKernel(
            np.ones([len(cont_dims)]),
            [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cont_dims))],
            nu=2.5,
            operate_on=cont_dims,
        )
    if len(cat_dims) > 0:
        ham_kernel = HammingKernel(
            np.ones([len(cat_dims)]),
            [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in range(len(cat_dims))],
            operate_on=cat_dims,
        )

    # Noise Kernel
    noise_kernel = WhiteKernel(
        noise_level=1e-8,
        noise_level_bounds=(np.exp(-25), np.exp(2)),
        prior=HorseshoePrior(scale=0.1, seed=scenario.seed),
    )

    # Continuous and categecorical HPs
    if len(cont_dims) > 0 and len(cat_dims) > 0:
        kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel

    # Only continuous HPs
    elif len(cont_dims) > 0 and len(cat_dims) == 0:
        kernel = cov_amp * exp_kernel + noise_kernel

    # Only categorical HPs
    elif len(cont_dims) == 0 and len(cat_dims) > 0:
        kernel = cov_amp * ham_kernel + noise_kernel

    else:
        raise ValueError("The number of continuous and categorical hyperparameters must be greater than zero.")

    return kernel

get_model 静态方法 #

get_model(
    scenario: Scenario,
    *,
    model_type: str | None = None,
    kernel: Kernel | None = None
) -> AbstractGaussianProcess

返回高斯过程代理模型。

参数#

scenario : Scenario model_type : str | None, 默认为 None 应选择哪种高斯过程模型。在 vanillamcmc 之间选择。kernel : kernels.Kernel | None, 默认为 None 代理模型中使用的核函数。

返回#

model : GaussianProcess | MCMCGaussianProcess 实例化的 Gaussian Process。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_model(
    scenario: Scenario,
    *,
    model_type: str | None = None,
    kernel: kernels.Kernel | None = None,
) -> AbstractGaussianProcess:
    """Returns a Gaussian Process surrogate model.

    Parameters
    ----------
    scenario : Scenario
    model_type : str | None, defaults to None
        Which Gaussian Process model should be chosen. Choose between `vanilla` and `mcmc`.
    kernel : kernels.Kernel | None, defaults to None
        The kernel used in the surrogate model.

    Returns
    -------
    model : GaussianProcess | MCMCGaussianProcess
        The instantiated gaussian process.
    """
    available_model_types = [None, "vanilla", "mcmc"]
    if model_type not in available_model_types:
        types = [str(t) for t in available_model_types]
        raise ValueError(f"The model_type `{model_type}` is not supported. Choose one of {', '.join(types)}")

    if kernel is None:
        kernel = BlackBoxFacade.get_kernel(scenario=scenario)

    if model_type is None or model_type == "vanilla":
        return GaussianProcess(
            configspace=scenario.configspace,
            kernel=kernel,
            normalize_y=True,
            seed=scenario.seed,
        )
    elif model_type == "mcmc":
        n_mcmc_walkers = 3 * len(kernel.theta)
        if n_mcmc_walkers % 2 == 1:
            n_mcmc_walkers += 1

        return MCMCGaussianProcess(
            configspace=scenario.configspace,
            kernel=kernel,
            n_mcmc_walkers=n_mcmc_walkers,
            chain_length=250,
            burning_steps=250,
            normalize_y=True,
            seed=scenario.seed,
        )
    else:
        raise ValueError("Unknown model type %s" % model_type)

get_multi_objective_algorithm 静态方法 #

get_multi_objective_algorithm(
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None
) -> MeanAggregationStrategy

返回多目标算法的平均聚合策略。

参数#

scenario : Scenario objective_weights : list[float] | None, 默认为 None 用于加权平均目标的权重。必须与目标数量长度相同。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_multi_objective_algorithm(  # type: ignore
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None,
) -> MeanAggregationStrategy:
    """Returns the mean aggregation strategy for the multi-objective algorithm.

    Parameters
    ----------
    scenario : Scenario
    objective_weights : list[float] | None, defaults to None
        Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of
        objectives.
    """
    return MeanAggregationStrategy(
        scenario=scenario,
        objective_weights=objective_weights,
    )

get_random_design 静态方法 #

get_random_design(
    scenario: Scenario,
    *,
    probability: float = 0.08447232371720552
) -> ProbabilityRandomDesign

返回 ProbabilityRandomDesign 用于交错配置。

参数#

probability : float, 默认为 0.08447232371720552 随机抽取配置的概率。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_random_design(  # type: ignore
    scenario: Scenario,
    *,
    probability: float = 0.08447232371720552,
) -> ProbabilityRandomDesign:
    """Returns ``ProbabilityRandomDesign`` for interleaving configurations.

    Parameters
    ----------
    probability : float, defaults to 0.08447232371720552
        Probability that a configuration will be drawn at random.
    """
    return ProbabilityRandomDesign(seed=scenario.seed, probability=probability)

get_runhistory_encoder 静态方法 #

get_runhistory_encoder(
    scenario: Scenario,
) -> RunHistoryEncoder

返回默认的运行历史编码器。

源代码位于 smac/facade/blackbox_facade.py
@staticmethod
def get_runhistory_encoder(
    scenario: Scenario,
) -> RunHistoryEncoder:
    """Returns the default runhistory encoder."""
    return RunHistoryEncoder(scenario)

optimize #

optimize(
    *, data_to_scatter: dict[str, Any] | None = None
) -> Configuration | list[Configuration]

优化算法的配置。

参数#

data_to_scatter: dict[str, Any] | None 我们首先注意到此参数仅对 dask_runner 有效!当用户将其本地进程中的数据分散到分布式网络时,此数据将以轮循方式按核心数量分组分发。粗略地说,我们可以将此数据保留在内存中,这样就不必每次需要执行带有大型数据集的目标函数时都进行(反)序列化。例如,当您的目标函数共享大型数据集时,此参数非常有用。

返回#

incumbent : Configuration 找到的最佳配置。

源代码位于 smac/facade/abstract_facade.py
def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configuration | list[Configuration]:
    """
    Optimizes the configuration of the algorithm.

    Parameters
    ----------
    data_to_scatter: dict[str, Any] | None
        We first note that this argument is valid only dask_runner!
        When a user scatters data from their local process to the distributed network,
        this data is distributed in a round-robin fashion grouping by number of cores.
        Roughly speaking, we can keep this data in memory and then we do not have to (de-)serialize the data
        every time we would like to execute a target function with a big dataset.
        For example, when your target function has a big dataset shared across all the target function,
        this argument is very useful.

    Returns
    -------
    incumbent : Configuration
        Best found configuration.
    """
    incumbents = None
    if isinstance(data_to_scatter, dict) and len(data_to_scatter) == 0:
        raise ValueError("data_to_scatter must be None or dict with some elements, but got an empty dict.")

    try:
        incumbents = self._optimizer.optimize(data_to_scatter=data_to_scatter)
    finally:
        self._optimizer.save()

    return incumbents

tell #

tell(
    info: TrialInfo, value: TrialValue, save: bool = True
) -> None

将试验结果添加到运行历史并更新强化器。

参数#

info: TrialInfo 描述要处理结果的试验。value: TrialValue 包含与试验执行相关的有用信息。save : bool, 可选,默认为 True 是否应保存运行历史。

源代码位于 smac/facade/abstract_facade.py
def tell(self, info: TrialInfo, value: TrialValue, save: bool = True) -> None:
    """Adds the result of a trial to the runhistory and updates the intensifier.

    Parameters
    ----------
    info: TrialInfo
        Describes the trial from which to process the results.
    value: TrialValue
        Contains relevant information regarding the execution of a trial.
    save : bool, optional to True
        Whether the runhistory should be saved.
    """
    return self._optimizer.tell(info, value, save=save)

validate #

validate(
    config: Configuration, *, seed: int | None = None
) -> float | list[float]

在与优化过程中使用的种子不同的种子以及最高预算(如果预算类型为实数值)上验证配置。

参数#

config : Configuration 要验证的配置。instances : list[str] | None, 默认为 None 要验证的实例。如果为 None,则使用场景中指定的所有实例。如果预算类型为实数值,则忽略此参数。seed : int | None, 默认为 None 如果为 None,则使用场景中的种子。

返回#

cost : float | list[float] 配置的平均成本。在多保真度情况下,每个目标的成本都会被平均。

源代码位于 smac/facade/abstract_facade.py
def validate(
    self,
    config: Configuration,
    *,
    seed: int | None = None,
) -> float | list[float]:
    """Validates a configuration on seeds different from the ones used in the optimization process and on the
    highest budget (if budget type is real-valued).

    Parameters
    ----------
    config : Configuration
        Configuration to validate
    instances : list[str] | None, defaults to None
        Which instances to validate. If None, all instances specified in the scenario are used.
        In case that the budget type is real-valued, this argument is ignored.
    seed : int | None, defaults to None
        If None, the seed from the scenario is used.

    Returns
    -------
    cost : float | list[float]
        The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is
        averaged.
    """
    return self._optimizer.validate(config, seed=seed)