用户对最优值的先验信息#
展开以复制 examples/1_basics/6_priors.py
(右上角)
import warnings
import numpy as np
from ConfigSpace import (
BetaIntegerHyperparameter,
CategoricalHyperparameter,
Configuration,
ConfigurationSpace,
NormalFloatHyperparameter,
UniformIntegerHyperparameter,
UniformFloatHyperparameter,
)
from sklearn.datasets import load_digits
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from smac import HyperparameterOptimizationFacade, Scenario
from smac.acquisition.function import PriorAcquisitionFunction
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"
digits = load_digits()
class MLP:
@property
def prior_configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()
# We do not have an educated belief on the number of layers beforehand
# As such, the prior on the HP is uniform
n_layer = UniformIntegerHyperparameter(
"n_layer",
lower=1,
upper=5,
)
# We believe the optimal network is likely going to be relatively wide,
# And place a Beta Prior skewed towards wider networks in log space
n_neurons = BetaIntegerHyperparameter(
"n_neurons",
lower=8,
upper=256,
alpha=4,
beta=2,
log=True,
)
# We believe that ReLU is likely going to be the optimal activation function about
# 60% of the time, and thus place weight on that accordingly
activation = CategoricalHyperparameter(
"activation",
["logistic", "tanh", "relu"],
weights=[1, 1, 3],
default_value="relu",
)
# Moreover, we believe ADAM is the most likely optimizer
optimizer = CategoricalHyperparameter(
"optimizer",
["sgd", "adam"],
weights=[1, 2],
default_value="adam",
)
# We do not have an educated opinion on the batch size, and thus leave it as-is
batch_size = UniformIntegerHyperparameter(
"batch_size",
16,
512,
default_value=128,
)
# We place a log-normal prior on the learning rate, so that it is centered on 10^-3,
# with one unit of standard deviation per multiple of 10 (in log space)
learning_rate_init = NormalFloatHyperparameter(
"learning_rate_init",
lower=1e-5,
upper=1.0,
mu=1e-3, # will be transformed to log space later
sigma=10, # will be transformed to log space later
log=True,
)
# Add all hyperparameters at once:
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)
return cs
@property
def configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()
# We do not have an educated belief on the number of layers beforehand
n_layer = UniformIntegerHyperparameter(
"n_layer",
lower=1,
upper=5,
)
# Define network width without a specific prior
n_neurons = UniformIntegerHyperparameter(
"n_neurons",
lower=8,
upper=256,
)
# Define activation functions without specific weights
activation = CategoricalHyperparameter(
"activation",
["logistic", "tanh", "relu"],
default_value="relu",
)
# Define optimizer without specific weights
optimizer = CategoricalHyperparameter(
"optimizer",
["sgd", "adam"],
default_value="adam",
)
# Define batch size without specific distribution
batch_size = UniformIntegerHyperparameter(
"batch_size",
16,
512,
default_value=128,
)
# Define learning rate range without log-normal prior
learning_rate_init = UniformFloatHyperparameter(
"learning_rate_init",
lower=1e-5,
upper=1.0,
default_value=1e-3,
)
# Add all hyperparameters at once:
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)
return cs
def train(self, config: Configuration, seed: int = 0) -> float:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=ConvergenceWarning)
classifier = MLPClassifier(
hidden_layer_sizes=[config["n_neurons"]] * config["n_layer"],
solver=config["optimizer"],
batch_size=config["batch_size"],
activation=config["activation"],
learning_rate_init=config["learning_rate_init"],
random_state=seed,
max_iter=5,
)
# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(
n_splits=5, random_state=seed, shuffle=True
) # to make CV splits consistent
score = cross_val_score(
classifier, digits.data, digits.target, cv=cv, error_score="raise"
)
return 1 - np.mean(score)
if __name__ == "__main__":
mlp = MLP()
default_config = mlp.configspace.get_default_configuration()
# Define our environment variables
scenario = Scenario(mlp.configspace, n_trials=40)
# We also want to include our default configuration in the initial design
initial_design = HyperparameterOptimizationFacade.get_initial_design(
scenario,
additional_configs=[default_config],
)
# We define the prior acquisition function, which conduct the optimization using priors over the optimum
acquisition_function = PriorAcquisitionFunction(
acquisition_function=HyperparameterOptimizationFacade.get_acquisition_function(
scenario
),
decay_beta=scenario.n_trials / 10, # Proven solid value
)
# We only want one config call (use only one seed in this example)
intensifier = HyperparameterOptimizationFacade.get_intensifier(
scenario,
max_config_calls=1,
)
# Create our SMAC object and pass the scenario and the train method
smac = HyperparameterOptimizationFacade(
scenario,
mlp.train,
initial_design=initial_design,
acquisition_function=acquisition_function,
intensifier=intensifier,
overwrite=True,
)
incumbent = smac.optimize()
# Get cost of default configuration
default_cost = smac.validate(default_config)
print(f"Default cost: {default_cost}")
# Let's calculate the cost of the incumbent
incumbent_cost = smac.validate(incumbent)
print(f"Default cost: {incumbent_cost}")
描述#
关于优化多层感知器 (MLP) 并在超参数上设置最优值先验信息的示例。这些先验信息来源于用户知识(来自类似任务的先前运行、常识或手动调优获得的直觉)。为了创建先验信息,我们利用 Normal 和 Beta 超参数,以及 CategoricalHyperparameter
的 "weights" 属性。这可以集成到任何 SMAC facade 的优化中,但这里我们仅使用超参数优化 facade。要将用户先验信息整合到优化中,您必须将采集函数更改为 PriorAcquisitionFunction
。
import warnings
import numpy as np
from ConfigSpace import (
BetaIntegerHyperparameter,
CategoricalHyperparameter,
Configuration,
ConfigurationSpace,
NormalFloatHyperparameter,
UniformIntegerHyperparameter,
UniformFloatHyperparameter,
)
from sklearn.datasets import load_digits
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from smac import HyperparameterOptimizationFacade, Scenario
from smac.acquisition.function import PriorAcquisitionFunction
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"
digits = load_digits()
class MLP:
@property
def prior_configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()
# We do not have an educated belief on the number of layers beforehand
# As such, the prior on the HP is uniform
n_layer = UniformIntegerHyperparameter(
"n_layer",
lower=1,
upper=5,
)
# We believe the optimal network is likely going to be relatively wide,
# And place a Beta Prior skewed towards wider networks in log space
n_neurons = BetaIntegerHyperparameter(
"n_neurons",
lower=8,
upper=256,
alpha=4,
beta=2,
log=True,
)
# We believe that ReLU is likely going to be the optimal activation function about
# 60% of the time, and thus place weight on that accordingly
activation = CategoricalHyperparameter(
"activation",
["logistic", "tanh", "relu"],
weights=[1, 1, 3],
default_value="relu",
)
# Moreover, we believe ADAM is the most likely optimizer
optimizer = CategoricalHyperparameter(
"optimizer",
["sgd", "adam"],
weights=[1, 2],
default_value="adam",
)
# We do not have an educated opinion on the batch size, and thus leave it as-is
batch_size = UniformIntegerHyperparameter(
"batch_size",
16,
512,
default_value=128,
)
# We place a log-normal prior on the learning rate, so that it is centered on 10^-3,
# with one unit of standard deviation per multiple of 10 (in log space)
learning_rate_init = NormalFloatHyperparameter(
"learning_rate_init",
lower=1e-5,
upper=1.0,
mu=1e-3, # will be transformed to log space later
sigma=10, # will be transformed to log space later
log=True,
)
# Add all hyperparameters at once:
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)
return cs
@property
def configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()
# We do not have an educated belief on the number of layers beforehand
n_layer = UniformIntegerHyperparameter(
"n_layer",
lower=1,
upper=5,
)
# Define network width without a specific prior
n_neurons = UniformIntegerHyperparameter(
"n_neurons",
lower=8,
upper=256,
)
# Define activation functions without specific weights
activation = CategoricalHyperparameter(
"activation",
["logistic", "tanh", "relu"],
default_value="relu",
)
# Define optimizer without specific weights
optimizer = CategoricalHyperparameter(
"optimizer",
["sgd", "adam"],
default_value="adam",
)
# Define batch size without specific distribution
batch_size = UniformIntegerHyperparameter(
"batch_size",
16,
512,
default_value=128,
)
# Define learning rate range without log-normal prior
learning_rate_init = UniformFloatHyperparameter(
"learning_rate_init",
lower=1e-5,
upper=1.0,
default_value=1e-3,
)
# Add all hyperparameters at once:
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)
return cs
def train(self, config: Configuration, seed: int = 0) -> float:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=ConvergenceWarning)
classifier = MLPClassifier(
hidden_layer_sizes=[config["n_neurons"]] * config["n_layer"],
solver=config["optimizer"],
batch_size=config["batch_size"],
activation=config["activation"],
learning_rate_init=config["learning_rate_init"],
random_state=seed,
max_iter=5,
)
# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(
n_splits=5, random_state=seed, shuffle=True
) # to make CV splits consistent
score = cross_val_score(
classifier, digits.data, digits.target, cv=cv, error_score="raise"
)
return 1 - np.mean(score)
if __name__ == "__main__":
mlp = MLP()
default_config = mlp.configspace.get_default_configuration()
# Define our environment variables
scenario = Scenario(mlp.configspace, n_trials=40)
# We also want to include our default configuration in the initial design
initial_design = HyperparameterOptimizationFacade.get_initial_design(
scenario,
additional_configs=[default_config],
)
# We define the prior acquisition function, which conduct the optimization using priors over the optimum
acquisition_function = PriorAcquisitionFunction(
acquisition_function=HyperparameterOptimizationFacade.get_acquisition_function(
scenario
),
decay_beta=scenario.n_trials / 10, # Proven solid value
)
# We only want one config call (use only one seed in this example)
intensifier = HyperparameterOptimizationFacade.get_intensifier(
scenario,
max_config_calls=1,
)
# Create our SMAC object and pass the scenario and the train method
smac = HyperparameterOptimizationFacade(
scenario,
mlp.train,
initial_design=initial_design,
acquisition_function=acquisition_function,
intensifier=intensifier,
overwrite=True,
)
incumbent = smac.optimize()
# Get cost of default configuration
default_cost = smac.validate(default_config)
print(f"Default cost: {default_cost}")
# Let's calculate the cost of the incumbent
incumbent_cost = smac.validate(incumbent)
print(f"Default cost: {incumbent_cost}")