跳至内容

算法配置外观

smac.facade.algorithm_configuration_facade #

AlgorithmConfigurationFacade #

AlgorithmConfigurationFacade(
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction
    | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer
    | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm
    | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder
    | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int
    | Path
    | Literal[False]
    | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None
)

基础:AbstractFacade

源代码位于 smac/facade/abstract_facade.py
def __init__(
    self,
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int | Path | Literal[False] | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None,
):
    setup_logging(logging_level)

    if callbacks is None:
        callbacks = []

    if model is None:
        model = self.get_model(scenario)

    if acquisition_function is None:
        acquisition_function = self.get_acquisition_function(scenario)

    if acquisition_maximizer is None:
        acquisition_maximizer = self.get_acquisition_maximizer(scenario)

    if initial_design is None:
        initial_design = self.get_initial_design(scenario)

    if random_design is None:
        random_design = self.get_random_design(scenario)

    if intensifier is None:
        intensifier = self.get_intensifier(scenario)

    if multi_objective_algorithm is None and scenario.count_objectives() > 1:
        multi_objective_algorithm = self.get_multi_objective_algorithm(scenario=scenario)

    if runhistory_encoder is None:
        runhistory_encoder = self.get_runhistory_encoder(scenario)

    if config_selector is None:
        config_selector = self.get_config_selector(scenario)

    # Initialize empty stats and runhistory object
    runhistory = RunHistory(multi_objective_algorithm=multi_objective_algorithm)

    # Set the seed for configuration space
    scenario.configspace.seed(scenario.seed)

    # Set variables globally
    self._scenario = scenario
    self._model = model
    self._acquisition_function = acquisition_function
    self._acquisition_maximizer = acquisition_maximizer
    self._initial_design = initial_design
    self._random_design = random_design
    self._intensifier = intensifier
    self._multi_objective_algorithm = multi_objective_algorithm
    self._runhistory = runhistory
    self._runhistory_encoder = runhistory_encoder
    self._config_selector = config_selector
    self._callbacks = callbacks
    self._overwrite = overwrite

    # Prepare the algorithm executer
    runner: AbstractRunner
    if isinstance(target_function, AbstractRunner):
        runner = target_function
    elif isinstance(target_function, str):
        runner = TargetFunctionScriptRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )
    else:
        runner = TargetFunctionRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )

    # In case of multiple jobs, we need to wrap the runner again using DaskParallelRunner
    if (n_workers := scenario.n_workers) > 1 or dask_client is not None:
        if dask_client is not None and n_workers > 1:
            logger.warning(
                "Provided `dask_client`. Ignore `scenario.n_workers`, directly set `n_workers` in `dask_client`."
            )
        else:
            available_workers = joblib.cpu_count()
            if n_workers > available_workers:
                logger.info(f"Workers are reduced to {n_workers}.")
                n_workers = available_workers

        # We use a dask runner for parallelization
        runner = DaskParallelRunner(single_worker=runner, dask_client=dask_client)

    # Set the runner to access it globally
    self._runner = runner

    # Adding dependencies of the components
    self._update_dependencies()

    # We have to update our meta data (basically arguments of the components)
    self._scenario._set_meta(self.meta)

    # We have to validate if the object compositions are correct and actually make sense
    self._validate()

    # Finally we configure our optimizer
    self._optimizer = self._get_optimizer()
    assert self._optimizer

    # Register callbacks here
    for callback in callbacks:
        self._optimizer.register_callback(callback)

    # Additionally, we register the runhistory callback from the intensifier to efficiently update our incumbent
    # every time new information are available
    self._optimizer.register_callback(self._intensifier.get_callback(), index=0)

intensifier property #

intensifier: AbstractIntensifier

负责 BO 循环的优化器。跟踪状态等有用信息。

meta property #

meta: dict[str, Any]

根据外观的所有组件生成哈希值。用于运行名称或确定是否应继续运行。

optimizer property #

optimizer: SMBO

负责 BO 循环的优化器。跟踪状态等有用信息。

runhistory property #

runhistory: RunHistory

在优化过程中填充所有试验的运行历史。

scenario property #

scenario: Scenario

包含所有环境信息的场景对象。

ask #

ask() -> TrialInfo

向强化器请求下一次试验。

源代码位于 smac/facade/abstract_facade.py
def ask(self) -> TrialInfo:
    """Asks the intensifier for the next trial."""
    return self._optimizer.ask()

get_acquisition_function staticmethod #

get_acquisition_function(
    scenario: Scenario, *, xi: float = 0.0
) -> EI

返回预期改进采集函数。

参数#

scenario : 场景 xi : float, 默认为 0.0 控制采集函数的探索与利用之间的平衡。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_acquisition_function(  # type: ignore
    scenario: Scenario,
    *,
    xi: float = 0.0,
) -> EI:
    """Returns an Expected Improvement acquisition function.

    Parameters
    ----------
    scenario : Scenario
    xi : float, defaults to 0.0
        Controls the balance between exploration and exploitation of the
        acquisition function.
    """
    return EI(xi=xi)

get_acquisition_maximizer staticmethod #

get_acquisition_maximizer(
    scenario: Scenario,
) -> LocalAndSortedRandomSearch

返回局部和排序的随机搜索作为采集最大化器。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_acquisition_maximizer(  # type: ignore
    scenario: Scenario,
) -> LocalAndSortedRandomSearch:
    """Returns local and sorted random search as acquisition maximizer."""
    optimizer = LocalAndSortedRandomSearch(
        scenario.configspace,
        seed=scenario.seed,
    )

    return optimizer

get_config_selector staticmethod #

get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16
) -> ConfigSelector

返回默认配置选择器。

源代码位于 smac/facade/abstract_facade.py
@staticmethod
def get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16,
) -> ConfigSelector:
    """Returns the default configuration selector."""
    return ConfigSelector(scenario, retrain_after=retrain_after, retries=retries)

get_initial_design staticmethod #

get_initial_design(
    scenario: Scenario,
    *,
    additional_configs: list[Configuration] = None
) -> DefaultInitialDesign

返回一个初始设计,该设计返回默认配置。

参数#

additional_configs: list[Configuration], 默认为 [] 将额外配置添加到初始设计中。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_initial_design(  # type: ignore
    scenario: Scenario,
    *,
    additional_configs: list[Configuration] = None,
) -> DefaultInitialDesign:
    """Returns an initial design, which returns the default configuration.

    Parameters
    ----------
    additional_configs: list[Configuration], defaults to []
        Adds additional configurations to the initial design.
    """
    if additional_configs is None:
        additional_configs = []
    return DefaultInitialDesign(
        scenario=scenario,
        additional_configs=additional_configs,
    )

get_intensifier staticmethod #

get_intensifier(
    scenario: Scenario,
    *,
    max_config_calls: int = 2000,
    max_incumbents: int = 10
) -> Intensifier

返回 Intensifier 作为强化器。支持预算。

参数#

max_config_calls : int, 默认为 3 最大配置评估次数。基本上,对于一个配置,最多应该评估多少个实例-种子键。 max_incumbents : int, 默认为 10 在多目标情况下跟踪多少个最佳配置。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_intensifier(
    scenario: Scenario,
    *,
    max_config_calls: int = 2000,
    max_incumbents: int = 10,
) -> Intensifier:
    """Returns ``Intensifier`` as intensifier. Supports budgets.

    Parameters
    ----------
    max_config_calls : int, defaults to 3
        Maximum number of configuration evaluations. Basically, how many instance-seed keys should be evaluated at
        maximum for a configuration.
    max_incumbents : int, defaults to 10
        How many incumbents to keep track of in the case of multi-objective.
    """
    return Intensifier(
        scenario=scenario,
        max_config_calls=max_config_calls,
        max_incumbents=max_incumbents,
    )

get_model staticmethod #

get_model(
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 5.0 / 6.0,
    min_samples_split: int = 3,
    min_samples_leaf: int = 3,
    max_depth: int = 20,
    bootstrapping: bool = True,
    pca_components: int = 4
) -> RandomForest

返回随机森林作为代理模型。

参数#

n_trees : int, 默认为 10 随机森林中的树数量。 ratio_features : float, 默认为 5.0 / 6.0 用于分裂时考虑的特征比例。 min_samples_split : int, 默认为 3 进行分裂所需的最少数据点数。 min_samples_leaf : int, 默认为 3 叶子中的最少数据点数。 max_depth : int, 默认为 20 单棵树的最大深度。 bootstrapping : bool, 默认为 True 启用自举。 pca_components : float, 默认为 4 使用 PCA 降低实例特征维度时保留的组件数。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_model(  # type: ignore
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 5.0 / 6.0,
    min_samples_split: int = 3,
    min_samples_leaf: int = 3,
    max_depth: int = 20,
    bootstrapping: bool = True,
    pca_components: int = 4,
) -> RandomForest:
    """Returns a random forest as surrogate model.

    Parameters
    ----------
    n_trees : int, defaults to 10
        The number of trees in the random forest.
    ratio_features : float, defaults to 5.0 / 6.0
        The ratio of features that are considered for splitting.
    min_samples_split : int, defaults to 3
        The minimum number of data points to perform a split.
    min_samples_leaf : int, defaults to 3
        The minimum number of data points in a leaf.
    max_depth : int, defaults to 20
        The maximum depth of a single tree.
    bootstrapping : bool, defaults to True
        Enables bootstrapping.
    pca_components : float, defaults to 4
        Number of components to keep when using PCA to reduce dimensionality of instance features.
    """
    return RandomForest(
        configspace=scenario.configspace,
        n_trees=n_trees,
        ratio_features=ratio_features,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_depth=max_depth,
        bootstrapping=bootstrapping,
        log_y=False,
        instance_features=scenario.instance_features,
        pca_components=pca_components,
        seed=scenario.seed,
    )

get_multi_objective_algorithm staticmethod #

get_multi_objective_algorithm(
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None
) -> MeanAggregationStrategy

返回多目标算法的平均聚合策略。

参数#

scenario : 场景 objective_weights : list[float] | None, 默认为 None 用于加权平均目标的权重。必须与目标数量长度相同。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_multi_objective_algorithm(  # type: ignore
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None,
) -> MeanAggregationStrategy:
    """Returns the mean aggregation strategy for the multi objective algorithm.

    Parameters
    ----------
    scenario : Scenario
    objective_weights : list[float] | None, defaults to None
        Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of
        objectives.
    """
    return MeanAggregationStrategy(
        scenario=scenario,
        objective_weights=objective_weights,
    )

get_random_design staticmethod #

get_random_design(
    scenario: Scenario, *, probability: float = 0.5
) -> ProbabilityRandomDesign

返回 ProbabilityRandomDesign 用于交织配置。

参数#

probability : float, 默认为 0.5 配置被随机抽取的概率。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_random_design(  # type: ignore
    scenario: Scenario,
    *,
    probability: float = 0.5,
) -> ProbabilityRandomDesign:
    """Returns ``ProbabilityRandomDesign`` for interleaving configurations.

    Parameters
    ----------
    probability : float, defaults to 0.5
        Probability that a configuration will be drawn at random.
    """
    return ProbabilityRandomDesign(probability=probability, seed=scenario.seed)

get_runhistory_encoder staticmethod #

get_runhistory_encoder(
    scenario: Scenario,
) -> RunHistoryEncoder

返回默认的运行历史编码器。

源代码位于 smac/facade/algorithm_configuration_facade.py
@staticmethod
def get_runhistory_encoder(scenario: Scenario) -> RunHistoryEncoder:
    """Returns the default runhistory encoder."""
    return RunHistoryEncoder(scenario)

optimize #

optimize(
    *, data_to_scatter: dict[str, Any] | None = None
) -> Configuration | list[Configuration]

优化算法的配置。

参数#

data_to_scatter: dict[str, Any] | None 首先需要注意的是,此参数仅对 dask_runner 有效!当用户将数据从其本地进程分散到分布式网络时,这些数据会以轮询方式按核心数量分组进行分发。粗略地说,我们可以将这些数据保留在内存中,这样每次要使用大数据集执行目标函数时,就无需(反)序列化数据。例如,当您的目标函数具有在所有目标函数之间共享的大数据集时,此参数非常有用。

返回#

incumbent : Configuration 找到的最佳配置。

源代码位于 smac/facade/abstract_facade.py
def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configuration | list[Configuration]:
    """
    Optimizes the configuration of the algorithm.

    Parameters
    ----------
    data_to_scatter: dict[str, Any] | None
        We first note that this argument is valid only dask_runner!
        When a user scatters data from their local process to the distributed network,
        this data is distributed in a round-robin fashion grouping by number of cores.
        Roughly speaking, we can keep this data in memory and then we do not have to (de-)serialize the data
        every time we would like to execute a target function with a big dataset.
        For example, when your target function has a big dataset shared across all the target function,
        this argument is very useful.

    Returns
    -------
    incumbent : Configuration
        Best found configuration.
    """
    incumbents = None
    if isinstance(data_to_scatter, dict) and len(data_to_scatter) == 0:
        raise ValueError("data_to_scatter must be None or dict with some elements, but got an empty dict.")

    try:
        incumbents = self._optimizer.optimize(data_to_scatter=data_to_scatter)
    finally:
        self._optimizer.save()

    return incumbents

tell #

tell(
    info: TrialInfo, value: TrialValue, save: bool = True
) -> None

将试验结果添加到运行历史并更新强化器。

参数#

info: TrialInfo 描述要处理结果的试验。 value: TrialValue 包含有关试验执行的相关信息。 save : bool, 可选,默认为 True 是否应保存运行历史。

源代码位于 smac/facade/abstract_facade.py
def tell(self, info: TrialInfo, value: TrialValue, save: bool = True) -> None:
    """Adds the result of a trial to the runhistory and updates the intensifier.

    Parameters
    ----------
    info: TrialInfo
        Describes the trial from which to process the results.
    value: TrialValue
        Contains relevant information regarding the execution of a trial.
    save : bool, optional to True
        Whether the runhistory should be saved.
    """
    return self._optimizer.tell(info, value, save=save)

validate #

validate(
    config: Configuration, *, seed: int | None = None
) -> float | list[float]

在与优化过程中使用的种子不同且在最高预算下(如果预算类型为实数值)验证配置。

参数#

config : Configuration 要验证的配置 instances : list[str] | None, 默认为 None 要验证哪些实例。如果为 None,则使用场景中指定的所有实例。如果预算类型为实数值,则忽略此参数。 seed : int | None, 默认为 None 如果为 None,则使用场景中的种子。

返回#

cost : float | list[float] 配置的平均成本。在多保真度情况下,对每个目标的成本进行平均。

源代码位于 smac/facade/abstract_facade.py
def validate(
    self,
    config: Configuration,
    *,
    seed: int | None = None,
) -> float | list[float]:
    """Validates a configuration on seeds different from the ones used in the optimization process and on the
    highest budget (if budget type is real-valued).

    Parameters
    ----------
    config : Configuration
        Configuration to validate
    instances : list[str] | None, defaults to None
        Which instances to validate. If None, all instances specified in the scenario are used.
        In case that the budget type is real-valued, this argument is ignored.
    seed : int | None, defaults to None
        If None, the seed from the scenario is used.

    Returns
    -------
    cost : float | list[float]
        The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is
        averaged.
    """
    return self._optimizer.validate(config, seed=seed)