注意
点击 此处 下载完整的示例代码
示例 5 的 Worker - PyTorch¶
此示例实现了使用 PyTorch 在 MNIST 上训练的小型 CNN。配置空间展示了最常见的超参数类型,甚至包含条件依赖关系。
我们将优化以下超参数
参数名称 | 参数类型 | 范围/选项 | 备注 |
---|---|---|---|
学习率 | 浮点型 | [1e-6, 1e-2] | 对数变化 |
优化器 | 类别型 | {Adam, SGD } | 离散选项 |
SGD 动量 | 浮点型 | [0, 0.99] | 仅当优化器为 SGD 时激活 |
卷积层数 | 整型 | [1,3] | 只能取整数值 1、2 或 3 |
第一卷积层中的滤波器数量 | 整型 | [4, 64] | 对数变化的整数值 |
第二卷积层中的滤波器数量 | 整型 | [4, 64] | 仅当层数 >= 2 时激活 |
第三卷积层中的滤波器数量 | 整型 | [4, 64] | 仅当层数 == 3 时激活 |
Dropout 率 | 浮点型 | [0, 0.9] | 标准连续参数 |
全连接层中的隐藏单元数 | 整型 | [8,256] | 对数变化的整数值 |
请参考下面的 compute 方法,了解如何使用 ConfigSpace 包定义这些参数。
当随机配置被采样时,网络性能并不出色,但几次迭代后应能达到 >90% 的准确率。为了加快训练速度,训练集只使用了 8192 张图像,验证集使用了 1024 张。其目的不是为了在 MNIST 上达到最先进的水平,而是为了展示如何在 HpBandSter 中使用 PyTorch,并演示一个更复杂的搜索空间。
try:
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
except:
raise ImportError("For this example you need to install pytorch.")
try:
import torchvision
import torchvision.transforms as transforms
except:
raise ImportError("For this example you need to install pytorch-vision.")
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
from hpbandster.core.worker import Worker
import logging
logging.basicConfig(level=logging.DEBUG)
class PyTorchWorker(Worker):
def __init__(self, N_train = 8192, N_valid = 1024, **kwargs):
super().__init__(**kwargs)
batch_size = 64
# Load the MNIST Data here
train_dataset = torchvision.datasets.MNIST(root='../../data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='../../data', train=False, transform=transforms.ToTensor())
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(range(N_train))
validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(range(N_train, N_train+N_valid))
self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, sampler=train_sampler)
self.validation_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=1024, sampler=validation_sampler)
self.test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1024, shuffle=False)
def compute(self, config, budget, working_directory, *args, **kwargs):
"""
Simple example for a compute function using a feed forward network.
It is trained on the MNIST dataset.
The input parameter "config" (dictionary) contains the sampled configurations passed by the bohb optimizer
"""
# device = torch.device('cpu')
model = MNISTConvNet(num_conv_layers=config['num_conv_layers'],
num_filters_1=config['num_filters_1'],
num_filters_2=config['num_filters_2'] if 'num_filters_2' in config else None,
num_filters_3=config['num_filters_3'] if 'num_filters_3' in config else None,
dropout_rate=config['dropout_rate'],
num_fc_units=config['num_fc_units'],
kernel_size=3
)
criterion = torch.nn.CrossEntropyLoss()
if config['optimizer'] == 'Adam':
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
else:
optimizer = torch.optim.SGD(model.parameters(), lr=config['lr'], momentum=config['sgd_momentum'])
for epoch in range(int(budget)):
loss = 0
model.train()
for i, (x, y) in enumerate(self.train_loader):
optimizer.zero_grad()
output = model(x)
loss = F.nll_loss(output, y)
loss.backward()
optimizer.step()
train_accuracy = self.evaluate_accuracy(model, self.train_loader)
validation_accuracy = self.evaluate_accuracy(model, self.validation_loader)
test_accuracy = self.evaluate_accuracy(model, self.test_loader)
return ({
'loss': 1-validation_accuracy, # remember: HpBandSter always minimizes!
'info': { 'test accuracy': test_accuracy,
'train accuracy': train_accuracy,
'validation accuracy': validation_accuracy,
'number of parameters': model.number_of_parameters(),
}
})
def evaluate_accuracy(self, model, data_loader):
model.eval()
correct=0
with torch.no_grad():
for x, y in data_loader:
output = model(x)
#test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(y.view_as(pred)).sum().item()
#import pdb; pdb.set_trace()
accuracy = correct/len(data_loader.sampler)
return(accuracy)
@staticmethod
def get_configspace():
"""
It builds the configuration space with the needed hyperparameters.
It is easily possible to implement different types of hyperparameters.
Beside float-hyperparameters on a log scale, it is also able to handle categorical input parameter.
:return: ConfigurationsSpace-Object
"""
cs = CS.ConfigurationSpace()
lr = CSH.UniformFloatHyperparameter('lr', lower=1e-6, upper=1e-1, default_value='1e-2', log=True)
# For demonstration purposes, we add different optimizers as categorical hyperparameters.
# To show how to use conditional hyperparameters with ConfigSpace, we'll add the optimizers 'Adam' and 'SGD'.
# SGD has a different parameter 'momentum'.
optimizer = CSH.CategoricalHyperparameter('optimizer', ['Adam', 'SGD'])
sgd_momentum = CSH.UniformFloatHyperparameter('sgd_momentum', lower=0.0, upper=0.99, default_value=0.9, log=False)
cs.add_hyperparameters([lr, optimizer, sgd_momentum])
# The hyperparameter sgd_momentum will be used,if the configuration
# contains 'SGD' as optimizer.
cond = CS.EqualsCondition(sgd_momentum, optimizer, 'SGD')
cs.add_condition(cond)
num_conv_layers = CSH.UniformIntegerHyperparameter('num_conv_layers', lower=1, upper=3, default_value=2)
num_filters_1 = CSH.UniformIntegerHyperparameter('num_filters_1', lower=4, upper=64, default_value=16, log=True)
num_filters_2 = CSH.UniformIntegerHyperparameter('num_filters_2', lower=4, upper=64, default_value=16, log=True)
num_filters_3 = CSH.UniformIntegerHyperparameter('num_filters_3', lower=4, upper=64, default_value=16, log=True)
cs.add_hyperparameters([num_conv_layers, num_filters_1, num_filters_2, num_filters_3])
# You can also use inequality conditions:
cond = CS.GreaterThanCondition(num_filters_2, num_conv_layers, 1)
cs.add_condition(cond)
cond = CS.GreaterThanCondition(num_filters_3, num_conv_layers, 2)
cs.add_condition(cond)
dropout_rate = CSH.UniformFloatHyperparameter('dropout_rate', lower=0.0, upper=0.9, default_value=0.5, log=False)
num_fc_units = CSH.UniformIntegerHyperparameter('num_fc_units', lower=8, upper=256, default_value=32, log=True)
cs.add_hyperparameters([dropout_rate, num_fc_units])
return cs
class MNISTConvNet(torch.nn.Module):
def __init__(self, num_conv_layers, num_filters_1, num_filters_2, num_filters_3, dropout_rate, num_fc_units, kernel_size):
super().__init__()
self.conv1 = nn.Conv2d(1, num_filters_1, kernel_size=kernel_size)
self.conv2 = None
self.conv3 = None
output_size = (28-kernel_size + 1)//2
num_output_filters = num_filters_1
if num_conv_layers > 1:
self.conv2 = nn.Conv2d(num_filters_1, num_filters_2, kernel_size=kernel_size)
num_output_filters = num_filters_2
output_size = (output_size - kernel_size + 1)//2
if num_conv_layers > 2:
self.conv3 = nn.Conv2d(num_filters_2, num_filters_3, kernel_size=kernel_size)
num_output_filters = num_filters_3
output_size = (output_size - kernel_size + 1)//2
self.dropout = nn.Dropout(p = dropout_rate)
self.conv_output_size = num_output_filters*output_size*output_size
self.fc1 = nn.Linear(self.conv_output_size, num_fc_units)
self.fc2 = nn.Linear(num_fc_units, 10)
def forward(self, x):
# switched order of pooling and relu compared to the original example
# to make it identical to the keras worker
# seems to also give better accuracies
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
if not self.conv2 is None:
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
if not self.conv3 is None:
x = F.max_pool2d(F.relu(self.conv3(x)), 2)
x = self.dropout(x)
x = x.view(-1, self.conv_output_size)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
def number_of_parameters(self):
return(sum(p.numel() for p in self.parameters() if p.requires_grad))
if __name__ == "__main__":
worker = KerasWorker(run_id='0')
cs = worker.get_configspace()
config = cs.sample_configuration().get_dictionary()
print(config)
res = worker.compute(config=config, budget=2, working_directory='.')
print(res)
脚本总运行时间: ( 0 分 0.000 秒)