ParEGO#
展开复制 examples/3_multi_objective/2_parego.py
(右上角)
from __future__ import annotations
import time
import warnings
import matplotlib.pyplot as plt
import numpy as np
from ConfigSpace import (
Categorical,
Configuration,
ConfigurationSpace,
EqualsCondition,
Float,
InCondition,
Integer,
)
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from smac import HyperparameterOptimizationFacade as HPOFacade
from smac import Scenario
from smac.facade.abstract_facade import AbstractFacade
from smac.multi_objective.parego import ParEGO
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"
digits = load_digits()
class MLP:
@property
def configspace(self) -> ConfigurationSpace:
cs = ConfigurationSpace()
n_layer = Integer("n_layer", (1, 5), default=1)
n_neurons = Integer("n_neurons", (8, 256), log=True, default=10)
activation = Categorical("activation", ["logistic", "tanh", "relu"], default="tanh")
solver = Categorical("solver", ["lbfgs", "sgd", "adam"], default="adam")
batch_size = Integer("batch_size", (30, 300), default=200)
learning_rate = Categorical("learning_rate", ["constant", "invscaling", "adaptive"], default="constant")
learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True)
cs.add([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init])
use_lr = EqualsCondition(child=learning_rate, parent=solver, value="sgd")
use_lr_init = InCondition(child=learning_rate_init, parent=solver, values=["sgd", "adam"])
use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"])
# We can also add multiple conditions on hyperparameters at once:
cs.add([use_lr, use_batch_size, use_lr_init])
return cs
def train(self, config: Configuration, seed: int = 0, budget: int = 10) -> dict[str, float]:
lr = config.get("learning_rate", "constant")
lr_init = config.get("learning_rate_init", 0.001)
batch_size = config.get("batch_size", 200)
start_time = time.time()
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
classifier = MLPClassifier(
hidden_layer_sizes=[config["n_neurons"]] * config["n_layer"],
solver=config["solver"],
batch_size=batch_size,
activation=config["activation"],
learning_rate=lr,
learning_rate_init=lr_init,
max_iter=int(np.ceil(budget)),
random_state=seed,
)
# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # to make CV splits consistent
score = cross_val_score(classifier, digits.data, digits.target, cv=cv, error_score="raise")
return {
"1 - accuracy": 1 - np.mean(score),
"time": time.time() - start_time,
}
def plot_pareto(smac: AbstractFacade, incumbents: list[Configuration]) -> None:
"""Plots configurations from SMAC and highlights the best configurations in a Pareto front."""
average_costs = []
average_pareto_costs = []
for config in smac.runhistory.get_configs():
# Since we use multiple seeds, we have to average them to get only one cost value pair for each configuration
average_cost = smac.runhistory.average_cost(config)
if config in incumbents:
average_pareto_costs += [average_cost]
else:
average_costs += [average_cost]
# Let's work with a numpy array
costs = np.vstack(average_costs)
pareto_costs = np.vstack(average_pareto_costs)
pareto_costs = pareto_costs[pareto_costs[:, 0].argsort()] # Sort them
costs_x, costs_y = costs[:, 0], costs[:, 1]
pareto_costs_x, pareto_costs_y = pareto_costs[:, 0], pareto_costs[:, 1]
plt.scatter(costs_x, costs_y, marker="x", label="Configuration")
plt.scatter(pareto_costs_x, pareto_costs_y, marker="x", c="r", label="Incumbent")
plt.step(
[pareto_costs_x[0]] + pareto_costs_x.tolist() + [np.max(costs_x)], # We add bounds
[np.max(costs_y)] + pareto_costs_y.tolist() + [np.min(pareto_costs_y)], # We add bounds
where="post",
linestyle=":",
)
plt.title("Pareto-Front")
plt.xlabel(smac.scenario.objectives[0])
plt.ylabel(smac.scenario.objectives[1])
plt.legend()
plt.show()
if __name__ == "__main__":
mlp = MLP()
objectives = ["1 - accuracy", "time"]
# Define our environment variables
scenario = Scenario(
mlp.configspace,
objectives=objectives,
walltime_limit=30, # After 30 seconds, we stop the hyperparameter optimization
n_trials=200, # Evaluate max 200 different trials
n_workers=1,
)
# We want to run five random configurations before starting the optimization.
initial_design = HPOFacade.get_initial_design(scenario, n_configs=5)
multi_objective_algorithm = ParEGO(scenario)
intensifier = HPOFacade.get_intensifier(scenario, max_config_calls=2)
# Create our SMAC object and pass the scenario and the train method
smac = HPOFacade(
scenario,
mlp.train,
initial_design=initial_design,
multi_objective_algorithm=multi_objective_algorithm,
intensifier=intensifier,
overwrite=True,
)
# Let's optimize
incumbents = smac.optimize()
# Get cost of default configuration
default_cost = smac.validate(mlp.configspace.get_default_configuration())
print(f"Validated costs from default config: \n--- {default_cost}\n")
print("Validated costs from the Pareto front (incumbents):")
for incumbent in incumbents:
cost = smac.validate(incumbent)
print("---", cost)
# Let's plot a pareto front
plot_pareto(smac, incumbents)
描述#
一个展示如何在 ParEGO 中使用多目标优化的示例。将使用 MLP 在 digits 数据集上同时优化准确性和运行时间,配置结果将在图中展示,其中最佳配置以帕累托前沿高亮显示。红色十字表示 SMAC 选择的最佳配置。
在优化过程中,SMAC 会在两个不同的随机种子下评估配置。因此,图中显示的是每个配置的平均准确性和运行时间。
from __future__ import annotations
import time
import warnings
import matplotlib.pyplot as plt
import numpy as np
from ConfigSpace import (
Categorical,
Configuration,
ConfigurationSpace,
EqualsCondition,
Float,
InCondition,
Integer,
)
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from smac import HyperparameterOptimizationFacade as HPOFacade
from smac import Scenario
from smac.facade.abstract_facade import AbstractFacade
from smac.multi_objective.parego import ParEGO
__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"
digits = load_digits()
class MLP:
@property
def configspace(self) -> ConfigurationSpace:
cs = ConfigurationSpace()
n_layer = Integer("n_layer", (1, 5), default=1)
n_neurons = Integer("n_neurons", (8, 256), log=True, default=10)
activation = Categorical("activation", ["logistic", "tanh", "relu"], default="tanh")
solver = Categorical("solver", ["lbfgs", "sgd", "adam"], default="adam")
batch_size = Integer("batch_size", (30, 300), default=200)
learning_rate = Categorical("learning_rate", ["constant", "invscaling", "adaptive"], default="constant")
learning_rate_init = Float("learning_rate_init", (0.0001, 1.0), default=0.001, log=True)
cs.add([n_layer, n_neurons, activation, solver, batch_size, learning_rate, learning_rate_init])
use_lr = EqualsCondition(child=learning_rate, parent=solver, value="sgd")
use_lr_init = InCondition(child=learning_rate_init, parent=solver, values=["sgd", "adam"])
use_batch_size = InCondition(child=batch_size, parent=solver, values=["sgd", "adam"])
# We can also add multiple conditions on hyperparameters at once:
cs.add([use_lr, use_batch_size, use_lr_init])
return cs
def train(self, config: Configuration, seed: int = 0, budget: int = 10) -> dict[str, float]:
lr = config.get("learning_rate", "constant")
lr_init = config.get("learning_rate_init", 0.001)
batch_size = config.get("batch_size", 200)
start_time = time.time()
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
classifier = MLPClassifier(
hidden_layer_sizes=[config["n_neurons"]] * config["n_layer"],
solver=config["solver"],
batch_size=batch_size,
activation=config["activation"],
learning_rate=lr,
learning_rate_init=lr_init,
max_iter=int(np.ceil(budget)),
random_state=seed,
)
# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # to make CV splits consistent
score = cross_val_score(classifier, digits.data, digits.target, cv=cv, error_score="raise")
return {
"1 - accuracy": 1 - np.mean(score),
"time": time.time() - start_time,
}
def plot_pareto(smac: AbstractFacade, incumbents: list[Configuration]) -> None:
"""Plots configurations from SMAC and highlights the best configurations in a Pareto front."""
average_costs = []
average_pareto_costs = []
for config in smac.runhistory.get_configs():
# Since we use multiple seeds, we have to average them to get only one cost value pair for each configuration
average_cost = smac.runhistory.average_cost(config)
if config in incumbents:
average_pareto_costs += [average_cost]
else:
average_costs += [average_cost]
# Let's work with a numpy array
costs = np.vstack(average_costs)
pareto_costs = np.vstack(average_pareto_costs)
pareto_costs = pareto_costs[pareto_costs[:, 0].argsort()] # Sort them
costs_x, costs_y = costs[:, 0], costs[:, 1]
pareto_costs_x, pareto_costs_y = pareto_costs[:, 0], pareto_costs[:, 1]
plt.scatter(costs_x, costs_y, marker="x", label="Configuration")
plt.scatter(pareto_costs_x, pareto_costs_y, marker="x", c="r", label="Incumbent")
plt.step(
[pareto_costs_x[0]] + pareto_costs_x.tolist() + [np.max(costs_x)], # We add bounds
[np.max(costs_y)] + pareto_costs_y.tolist() + [np.min(pareto_costs_y)], # We add bounds
where="post",
linestyle=":",
)
plt.title("Pareto-Front")
plt.xlabel(smac.scenario.objectives[0])
plt.ylabel(smac.scenario.objectives[1])
plt.legend()
plt.show()
if __name__ == "__main__":
mlp = MLP()
objectives = ["1 - accuracy", "time"]
# Define our environment variables
scenario = Scenario(
mlp.configspace,
objectives=objectives,
walltime_limit=30, # After 30 seconds, we stop the hyperparameter optimization
n_trials=200, # Evaluate max 200 different trials
n_workers=1,
)
# We want to run five random configurations before starting the optimization.
initial_design = HPOFacade.get_initial_design(scenario, n_configs=5)
multi_objective_algorithm = ParEGO(scenario)
intensifier = HPOFacade.get_intensifier(scenario, max_config_calls=2)
# Create our SMAC object and pass the scenario and the train method
smac = HPOFacade(
scenario,
mlp.train,
initial_design=initial_design,
multi_objective_algorithm=multi_objective_algorithm,
intensifier=intensifier,
overwrite=True,
)
# Let's optimize
incumbents = smac.optimize()
# Get cost of default configuration
default_cost = smac.validate(mlp.configspace.get_default_configuration())
print(f"Validated costs from default config: \n--- {default_cost}\n")
print("Validated costs from the Pareto front (incumbents):")
for incumbent in incumbents:
cost = smac.validate(incumbent)
print("---", cost)
# Let's plot a pareto front
plot_pareto(smac, incumbents)