使用多轮次的多层感知机#
点击展开以复制 examples/2_multi_fidelity/1_mlp_epochs.py
(右上角)
import warnings
import matplotlib.pyplot as plt
import numpy as np
from ConfigSpace import (
Categorical,
Configuration,
ConfigurationSpace,
EqualsCondition,
Float,
InCondition,
Integer,
)
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from smac import MultiFidelityFacade as MFFacade
from smac import Scenario
from smac.facade import AbstractFacade
from smac.intensifier.hyperband import Hyperband
from smac.intensifier.successive_halving import SuccessiveHalving
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"
dataset = load_digits()
class MLP:
@property
def configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types, we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()
n_layer = Integer("n_layer", (1, 5), default=1)
n_neurons = Integer("n_neurons", (8, 256), log=True, default=10)
activation = Categorical("activation", ["logistic", "tanh", "relu"], default="tanh")
solver = Categorical("solver", ["lbfgs", "sgd", "adam"], default="adam")
batch_size = Integer("batch_size", (30, 300), default=200)
learning_rate = Categorical("learning_rate", ["constant", "invscaling", "adaptive"], default="constant")
learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True)
# Add all hyperparameters at once:
cs.add([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init])
# Adding conditions to restrict the hyperparameter space...
# ... since learning rate is only used when solver is 'sgd'.
use_lr = EqualsCondition(child=learning_rate, parent=solver, value="sgd")
# ... since learning rate initialization will only be accounted for when using 'sgd' or 'adam'.
use_lr_init = InCondition(child=learning_rate_init, parent=solver, values=["sgd", "adam"])
# ... since batch size will not be considered when optimizer is 'lbfgs'.
use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"])
# We can also add multiple conditions on hyperparameters at once:
cs.add([use_lr, use_batch_size, use_lr_init])
return cs
def train(self, config: Configuration, seed: int = 0, budget: int = 25) -> float:
# For deactivated parameters (by virtue of the conditions),
# the configuration stores None-values.
# This is not accepted by the MLP, so we replace them with placeholder values.
lr = config.get("learning_rate", "constant")
lr_init = config.get("learning_rate_init", 0.001)
batch_size = config.get("batch_size", 200)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
classifier = MLPClassifier(
hidden_layer_sizes=[config["n_neurons"]] * config["n_layer"],
solver=config["solver"],
batch_size=batch_size,
activation=config["activation"],
learning_rate=lr,
learning_rate_init=lr_init,
max_iter=int(np.ceil(budget)),
random_state=seed,
)
# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # to make CV splits consistent
score = cross_val_score(classifier, dataset.data, dataset.target, cv=cv, error_score="raise")
return 1 - np.mean(score)
def plot_trajectory(facades: list[AbstractFacade]) -> None:
"""Plots the trajectory (incumbents) of the optimization process."""
plt.figure()
plt.title("Trajectory")
plt.xlabel("Wallclock time [s]")
plt.ylabel(facades[0].scenario.objectives)
plt.ylim(0, 0.4)
for facade in facades:
X, Y = [], []
for item in facade.intensifier.trajectory:
# Single-objective optimization
assert len(item.config_ids) == 1
assert len(item.costs) == 1
y = item.costs[0]
x = item.walltime
X.append(x)
Y.append(y)
plt.plot(X, Y, label=facade.intensifier.__class__.__name__)
plt.scatter(X, Y, marker="x")
plt.legend()
plt.show()
if __name__ == "__main__":
mlp = MLP()
facades: list[AbstractFacade] = []
for intensifier_object in [SuccessiveHalving, Hyperband]:
# Define our environment variables
scenario = Scenario(
mlp.configspace,
walltime_limit=60, # After 60 seconds, we stop the hyperparameter optimization
n_trials=500, # Evaluate max 500 different trials
min_budget=1, # Train the MLP using a hyperparameter configuration for at least 5 epochs
max_budget=25, # Train the MLP using a hyperparameter configuration for at most 25 epochs
n_workers=8,
)
# We want to run five random configurations before starting the optimization.
initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
# Create our intensifier
intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")
# Create our SMAC object and pass the scenario and the train method
smac = MFFacade(
scenario,
mlp.train,
initial_design=initial_design,
intensifier=intensifier,
overwrite=True,
)
# Let's optimize
incumbent = smac.optimize()
# Get cost of default configuration
default_cost = smac.validate(mlp.configspace.get_default_configuration())
print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
# Let's calculate the cost of the incumbent
incumbent_cost = smac.validate(incumbent)
print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")
facades.append(smac)
# Let's plot it
plot_trajectory(facades)
描述#
使用多个预算优化多层感知机(MLP)的示例。由于我们想利用多保真度,MultiFidelityFacade
是一个不错的选择。默认情况下,MultiFidelityFacade
内部使用 hyperband <https://arxiv.org/abs/1603.06560>
_ 作为强化器运行,它是激进的竞赛机制和逐次减半的组合。关键在于,目标函数必须接受一个预算变量,详细说明 smac 希望分配给此配置多少保真度。在此示例中,我们同时使用 SuccessiveHalving
和 Hyperband
来比较结果。
MLP 是一个深度神经网络,因此我们选择 epoch 作为保真度类型。这意味着 budget
指定了 smac 希望分配的 epoch 数量。选择 digits 数据集来优化 5 折交叉验证的平均准确率。
注意
此示例使用 MultiFidelityFacade
外观,它是与 BOHB <https://github.com/automl/HpBandSter>
_ 最接近的实现。
import warnings
import matplotlib.pyplot as plt
import numpy as np
from ConfigSpace import (
Categorical,
Configuration,
ConfigurationSpace,
EqualsCondition,
Float,
InCondition,
Integer,
)
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from smac import MultiFidelityFacade as MFFacade
from smac import Scenario
from smac.facade import AbstractFacade
from smac.intensifier.hyperband import Hyperband
from smac.intensifier.successive_halving import SuccessiveHalving
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"
dataset = load_digits()
class MLP:
@property
def configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types, we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()
n_layer = Integer("n_layer", (1, 5), default=1)
n_neurons = Integer("n_neurons", (8, 256), log=True, default=10)
activation = Categorical("activation", ["logistic", "tanh", "relu"], default="tanh")
solver = Categorical("solver", ["lbfgs", "sgd", "adam"], default="adam")
batch_size = Integer("batch_size", (30, 300), default=200)
learning_rate = Categorical("learning_rate", ["constant", "invscaling", "adaptive"], default="constant")
learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True)
# Add all hyperparameters at once:
cs.add([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init])
# Adding conditions to restrict the hyperparameter space...
# ... since learning rate is only used when solver is 'sgd'.
use_lr = EqualsCondition(child=learning_rate, parent=solver, value="sgd")
# ... since learning rate initialization will only be accounted for when using 'sgd' or 'adam'.
use_lr_init = InCondition(child=learning_rate_init, parent=solver, values=["sgd", "adam"])
# ... since batch size will not be considered when optimizer is 'lbfgs'.
use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"])
# We can also add multiple conditions on hyperparameters at once:
cs.add([use_lr, use_batch_size, use_lr_init])
return cs
def train(self, config: Configuration, seed: int = 0, budget: int = 25) -> float:
# For deactivated parameters (by virtue of the conditions),
# the configuration stores None-values.
# This is not accepted by the MLP, so we replace them with placeholder values.
lr = config.get("learning_rate", "constant")
lr_init = config.get("learning_rate_init", 0.001)
batch_size = config.get("batch_size", 200)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
classifier = MLPClassifier(
hidden_layer_sizes=[config["n_neurons"]] * config["n_layer"],
solver=config["solver"],
batch_size=batch_size,
activation=config["activation"],
learning_rate=lr,
learning_rate_init=lr_init,
max_iter=int(np.ceil(budget)),
random_state=seed,
)
# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # to make CV splits consistent
score = cross_val_score(classifier, dataset.data, dataset.target, cv=cv, error_score="raise")
return 1 - np.mean(score)
def plot_trajectory(facades: list[AbstractFacade]) -> None:
"""Plots the trajectory (incumbents) of the optimization process."""
plt.figure()
plt.title("Trajectory")
plt.xlabel("Wallclock time [s]")
plt.ylabel(facades[0].scenario.objectives)
plt.ylim(0, 0.4)
for facade in facades:
X, Y = [], []
for item in facade.intensifier.trajectory:
# Single-objective optimization
assert len(item.config_ids) == 1
assert len(item.costs) == 1
y = item.costs[0]
x = item.walltime
X.append(x)
Y.append(y)
plt.plot(X, Y, label=facade.intensifier.__class__.__name__)
plt.scatter(X, Y, marker="x")
plt.legend()
plt.show()
if __name__ == "__main__":
mlp = MLP()
facades: list[AbstractFacade] = []
for intensifier_object in [SuccessiveHalving, Hyperband]:
# Define our environment variables
scenario = Scenario(
mlp.configspace,
walltime_limit=60, # After 60 seconds, we stop the hyperparameter optimization
n_trials=500, # Evaluate max 500 different trials
min_budget=1, # Train the MLP using a hyperparameter configuration for at least 5 epochs
max_budget=25, # Train the MLP using a hyperparameter configuration for at most 25 epochs
n_workers=8,
)
# We want to run five random configurations before starting the optimization.
initial_design = MFFacade.get_initial_design(scenario, n_configs=5)
# Create our intensifier
intensifier = intensifier_object(scenario, incumbent_selection="highest_budget")
# Create our SMAC object and pass the scenario and the train method
smac = MFFacade(
scenario,
mlp.train,
initial_design=initial_design,
intensifier=intensifier,
overwrite=True,
)
# Let's optimize
incumbent = smac.optimize()
# Get cost of default configuration
default_cost = smac.validate(mlp.configspace.get_default_configuration())
print(f"Default cost ({intensifier.__class__.__name__}): {default_cost}")
# Let's calculate the cost of the incumbent
incumbent_cost = smac.validate(incumbent)
print(f"Incumbent cost ({intensifier.__class__.__name__}): {incumbent_cost}")
facades.append(smac)
# Let's plot it
plot_trajectory(facades)