用于准备数据以训练代理模型的抽象类。
参数
scenario : 场景对象。 considered_states : list[StatusType],默认为 [StatusType.SUCCESS, StatusType.CRASHED, StatusType.MEMORYOUT] # noqa: E501 将考虑具有传入状态的试验。 lower_budget_states : list[StatusType],默认为 [] 对于预算 < 当前预算的试验,还将考虑具有这些状态的所有试验。 scale_percentage : int,默认为 5 缩放的 y 转换使用百分位数来估计到最优值的距离。仅在某些子类中使用。 seed : int | None,默认为 none
引发
TypeError 如果未给出成功状态。
源代码位于 smac/runhistory/encoder/abstract_encoder.py
中
| def __init__(
self,
scenario: Scenario,
considered_states: list[StatusType] = None,
lower_budget_states: list[StatusType] = None,
scale_percentage: int = 5,
seed: int | None = None,
) -> None:
if considered_states is None:
considered_states = [
StatusType.SUCCESS,
StatusType.CRASHED,
StatusType.MEMORYOUT,
]
if seed is None:
seed = scenario.seed
self._seed = seed
self._rng = np.random.RandomState(seed)
self._scale_percentage = scale_percentage
self._n_objectives = scenario.count_objectives()
self._algorithm_walltime_limit = scenario.trial_walltime_limit
self._lower_budget_states = lower_budget_states if lower_budget_states is not None else []
self._considered_states = considered_states
self._instances = scenario.instances
self._instance_features = scenario.instance_features
self._n_features = scenario.count_instance_features()
self._n_params = len(list(scenario.configspace.values()))
if self._instances is not None and self._n_features == 0:
logger.warning(
"We strongly encourage to use instance features when using instances.",
"If no instance features are passed, the runhistory encoder can not distinguish between different "
"instances and therefore returns the same data points with different values, all of which are "
"used to train the surrogate model.\n"
"Consider using instance indices as features.",
)
# Learned statistics
self._min_y = np.array([np.nan] * self._n_objectives)
self._max_y = np.array([np.nan] * self._n_objectives)
self._percentile = np.array([np.nan] * self._n_objectives)
self._multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None
self._runhistory: RunHistory | None = None
|
meta 属性
返回创建对象的元数据。
返回
dict[str, Any]: 创建对象的元数据:名称、考虑的状态、较低预算状态、scale_percentage、seed。
multi_objective_algorithm 属性
可写
runhistory 属性
可写
用于转换数据的运行历史 (RunHistory)。
get_configurations
get_configurations(
budget_subset: list | None = None,
) -> ndarray
返回配置的向量表示。
警告
实例特征未附加,且成本值未考虑在内。
参数
budget_subset : list[int|float] | None,默认为 none 要考虑的预算列表。
返回
configs_array : np.ndarray
源代码位于 smac/runhistory/encoder/abstract_encoder.py
中
| def get_configurations(
self,
budget_subset: list | None = None,
) -> np.ndarray:
"""Returns vector representation of the configurations.
Warning
-------
Instance features are not
appended and cost values are not taken into account.
Parameters
----------
budget_subset : list[int|float] | None, defaults to none
List of budgets to consider.
Returns
-------
configs_array : np.ndarray
"""
s_trials = self._get_considered_trials(budget_subset)
s_config_ids = set(s_trial.config_id for s_trial in s_trials)
t_trials = self._get_timeout_trials(budget_subset)
t_config_ids = set(t_trial.config_id for t_trial in t_trials)
config_ids = s_config_ids | t_config_ids
configurations = [self.runhistory._ids_config[config_id] for config_id in config_ids]
configs_array = convert_configurations_to_array(configurations)
return configs_array
|
transform
返回运行历史 (RunHistory) 的向量表示。
参数
budget_subset : list | None,默认为 none 要考虑的预算列表。
返回
X : np.ndarray 配置向量和实例特征。 Y : np.ndarray 成本值。
源代码位于 smac/runhistory/encoder/abstract_encoder.py
中
| def transform(
self,
budget_subset: list | None = None,
) -> tuple[np.ndarray, np.ndarray]:
"""Returns a vector representation of the RunHistory.
Parameters
----------
budget_subset : list | None, defaults to none
List of budgets to consider.
Returns
-------
X : np.ndarray
Configuration vector and instance features.
Y : np.ndarray
Cost values.
"""
logger.debug("Transforming RunHistory into X, y format...")
considered_trials = self._get_considered_trials(budget_subset)
X, Y = self._build_matrix(trials=considered_trials, store_statistics=True)
# Get real TIMEOUT runs
timeout_trials = self._get_timeout_trials(budget_subset)
# Use penalization (e.g. PAR10) for EPM training
store_statistics = True if np.any(np.isnan(self._min_y)) else False
tX, tY = self._build_matrix(trials=timeout_trials, store_statistics=store_statistics)
# If we don't have successful runs, we have to return all timeout runs
if not considered_trials:
return tX, tY
# If we do not impute, we also return TIMEOUT data
X = np.vstack((X, tX))
Y = np.concatenate((Y, tY))
logger.debug("Converted %d observations." % (X.shape[0]))
return X, Y
|
transform_response_values 抽象方法
转换函数响应值。
参数
values : np.ndarray 要转换的响应值。
返回
transformed_values : np.ndarray
源代码位于 smac/runhistory/encoder/abstract_encoder.py
中
| @abstractmethod
def transform_response_values(
self,
values: np.ndarray,
) -> np.ndarray:
"""Transform function response values.
Parameters
----------
values : np.ndarray
Response values to be transformed.
Returns
-------
transformed_values : np.ndarray
"""
raise NotImplementedError
|