示例 5 的 Worker - Keras

本示例使用 Keras 实现了一个小型 CNN,并在 MNIST 上对其进行训练。配置空间展示了最常见的超参数类型,甚至包含条件依赖关系。

我们将优化以下超参数

参数名称 参数类型 范围/选项 注释
学习率 浮点型 [1e-6, 1e-2] 按对数变化
优化器 类别型 {Adam, SGD } 离散选项
SGD 动量 浮点型 [0, 0.99] 仅在 optimizer == SGD 时激活
卷积层数量 整型 [1,3] 只能取整数值 1、2 或 3
第一卷积层的滤波器数量 整型 [4, 64] 按对数变化的整数值
第二卷积层的滤波器数量 整型 [4, 64] 仅在层数 >= 2 时激活
第三卷积层的滤波器数量 整型 [4, 64] 仅在层数 == 3 时激活
Dropout 比率 浮点型 [0, 0.9] 标准连续参数
全连接层的隐藏单元数量 整型 [8,256] 按对数变化的整数值

请参考下面的 compute 方法,了解如何使用 ConfigSpace 包定义这些参数。

当随机配置被采样时,网络性能并不出色,但经过几次迭代后,准确率应能达到 90% 以上。为了加快训练速度,仅使用 8192 张图片进行训练,1024 张用于验证。目的不是在 MNIST 上达到最先进的水平,而是展示如何在 HpBandSter 中使用 Keras,并演示一个更复杂的搜索空间。

try:
    import keras
    from keras.datasets import mnist
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Flatten
    from keras.layers import Conv2D, MaxPooling2D
    from keras import backend as K
except:
    raise ImportError("For this example you need to install keras.")

try:
    import torchvision
    import torchvision.transforms as transforms
except:
    raise ImportError("For this example you need to install pytorch-vision.")



import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH

from hpbandster.core.worker import Worker

import logging
logging.basicConfig(level=logging.DEBUG)





class KerasWorker(Worker):
    def __init__(self, N_train=8192, N_valid=1024, **kwargs):
            super().__init__(**kwargs)

            self.batch_size = 64

            img_rows = 28
            img_cols = 28
            self.num_classes = 10

            # the data, split between train and test sets
            (x_train, y_train), (x_test, y_test) = mnist.load_data()

            if K.image_data_format() == 'channels_first':
                    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
                    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
                    self.input_shape = (1, img_rows, img_cols)
            else:
                    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
                    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
                    self.input_shape = (img_rows, img_cols, 1)


            x_train = x_train.astype('float32')
            x_test = x_test.astype('float32')
            # zero-one normalization
            x_train /= 255
            x_test /= 255


            # convert class vectors to binary class matrices
            y_train = keras.utils.to_categorical(y_train, self.num_classes)
            y_test = keras.utils.to_categorical(y_test, self.num_classes)


            self.x_train, self.y_train = x_train[:N_train], y_train[:N_train]
            self.x_validation, self.y_validation = x_train[-N_valid:], y_train[-N_valid:]
            self.x_test, self.y_test   = x_test, y_test

            self.input_shape = (img_rows, img_cols, 1)




    def compute(self, config, budget, working_directory, *args, **kwargs):
            """
            Simple example for a compute function using a feed forward network.
            It is trained on the MNIST dataset.
            The input parameter "config" (dictionary) contains the sampled configurations passed by the bohb optimizer
            """

            model = Sequential()

            model.add(Conv2D(config['num_filters_1'], kernel_size=(3,3),
                                             activation='relu',
                                             input_shape=self.input_shape))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            if config['num_conv_layers'] > 1:
                    model.add(Conv2D(config['num_filters_2'], kernel_size=(3, 3),
                                                     activation='relu',
                                                     input_shape=self.input_shape))
                    model.add(MaxPooling2D(pool_size=(2, 2)))

            if config['num_conv_layers'] > 2:
                    model.add(Conv2D(config['num_filters_3'], kernel_size=(3, 3),
                                             activation='relu',
                                             input_shape=self.input_shape))
                    model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Dropout(config['dropout_rate']))
            model.add(Flatten())
            model.add(Dense(config['num_fc_units'], activation='relu'))
            model.add(Dropout(config['dropout_rate']))
            model.add(Dense(self.num_classes, activation='softmax'))


            if config['optimizer'] == 'Adam':
                    optimizer = keras.optimizers.Adam(lr=config['lr'])
            else:
                    optimizer = keras.optimizers.SGD(lr=config['lr'], momentum=config['sgd_momentum'])

            model.compile(loss=keras.losses.categorical_crossentropy,
                                      optimizer=optimizer,
                                      metrics=['accuracy'])

            model.fit(self.x_train, self.y_train,
                              batch_size=self.batch_size,
                              epochs=int(budget),
                              verbose=0,
                              validation_data=(self.x_test, self.y_test))

            train_score = model.evaluate(self.x_train, self.y_train, verbose=0)
            val_score = model.evaluate(self.x_validation, self.y_validation, verbose=0)
            test_score = model.evaluate(self.x_test, self.y_test, verbose=0)

            #import IPython; IPython.embed()
            return ({
                    'loss': 1-val_score[1], # remember: HpBandSter always minimizes!
                    'info': {       'test accuracy': test_score[1],
                                            'train accuracy': train_score[1],
                                            'validation accuracy': val_score[1],
                                            'number of parameters': model.count_params(),
                                    }

            })


    @staticmethod
    def get_configspace():
            """
            It builds the configuration space with the needed hyperparameters.
            It is easily possible to implement different types of hyperparameters.
            Beside float-hyperparameters on a log scale, it is also able to handle categorical input parameter.
            :return: ConfigurationsSpace-Object
            """
            cs = CS.ConfigurationSpace()

            lr = CSH.UniformFloatHyperparameter('lr', lower=1e-6, upper=1e-1, default_value='1e-2', log=True)

            # For demonstration purposes, we add different optimizers as categorical hyperparameters.
            # To show how to use conditional hyperparameters with ConfigSpace, we'll add the optimizers 'Adam' and 'SGD'.
            # SGD has a different parameter 'momentum'.
            optimizer = CSH.CategoricalHyperparameter('optimizer', ['Adam', 'SGD'])

            sgd_momentum = CSH.UniformFloatHyperparameter('sgd_momentum', lower=0.0, upper=0.99, default_value=0.9, log=False)

            cs.add_hyperparameters([lr, optimizer, sgd_momentum])



            num_conv_layers =  CSH.UniformIntegerHyperparameter('num_conv_layers', lower=1, upper=3, default_value=2)

            num_filters_1 = CSH.UniformIntegerHyperparameter('num_filters_1', lower=4, upper=64, default_value=16, log=True)
            num_filters_2 = CSH.UniformIntegerHyperparameter('num_filters_2', lower=4, upper=64, default_value=16, log=True)
            num_filters_3 = CSH.UniformIntegerHyperparameter('num_filters_3', lower=4, upper=64, default_value=16, log=True)

            cs.add_hyperparameters([num_conv_layers, num_filters_1, num_filters_2, num_filters_3])


            dropout_rate = CSH.UniformFloatHyperparameter('dropout_rate', lower=0.0, upper=0.9, default_value=0.5, log=False)
            num_fc_units = CSH.UniformIntegerHyperparameter('num_fc_units', lower=8, upper=256, default_value=32, log=True)

            cs.add_hyperparameters([dropout_rate, num_fc_units])


            # The hyperparameter sgd_momentum will be used,if the configuration
            # contains 'SGD' as optimizer.
            cond = CS.EqualsCondition(sgd_momentum, optimizer, 'SGD')
            cs.add_condition(cond)

            # You can also use inequality conditions:
            cond = CS.GreaterThanCondition(num_filters_2, num_conv_layers, 1)
            cs.add_condition(cond)

            cond = CS.GreaterThanCondition(num_filters_3, num_conv_layers, 2)
            cs.add_condition(cond)

            return cs




if __name__ == "__main__":
    worker = KerasWorker(run_id='0')
    cs = worker.get_configspace()

    config = cs.sample_configuration().get_dictionary()
    print(config)
    res = worker.compute(config=config, budget=1, working_directory='.')
    print(res)

脚本总运行时间: ( 0 分钟 0.000 秒)

由 Sphinx-Gallery 生成的图库