顺序用法¶

默认情况下，auto-sklearn 并行拟合机器学习模型并构建它们的集成。但是，也可以按顺序运行这两个过程。下面的示例展示了如何先拟合模型，然后再构建集成。

from pprint import pprint

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

数据加载¶

from autosklearn.ensembles.ensemble_selection import EnsembleSelection

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

构建并拟合分类器¶

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    tmp_folder="/tmp/autosklearn_sequential_example_tmp",
    # Do not construct ensembles in parallel to avoid using more than one
    # core at a time. The ensemble will be constructed after auto-sklearn
    # finished fitting all machine learning models.
    ensemble_class=None,
    delete_tmp_folder_after_terminate=False,
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# This call to fit_ensemble uses all models trained in the previous call
# to fit to build an ensemble which can be used with automl.predict()
automl.fit_ensemble(y_train, ensemble_class=EnsembleSelection)

RunKey(config_id=1, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.138148307800293, status=<StatusType.SUCCESS: 1>, starttime=1663665046.729183, endtime=1663665048.8936107, additional_info={'duration': 2.025052070617676, 'num_run': 2, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=2, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.1886756420135498, status=<StatusType.SUCCESS: 1>, starttime=1663665048.8985617, endtime=1663665050.113383, additional_info={'duration': 1.0999813079833984, 'num_run': 3, 'train_loss': 0.01754385964912286, 'configuration_origin': 'Initial design'})
RunKey(config_id=3, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.05673758865248224, time=1.8330206871032715, status=<StatusType.SUCCESS: 1>, starttime=1663665050.1179628, endtime=1663665051.9798045, additional_info={'duration': 1.750197410583496, 'num_run': 4, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=4, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.5171611309051514, status=<StatusType.SUCCESS: 1>, starttime=1663665051.984322, endtime=1663665054.5282989, additional_info={'duration': 2.3936502933502197, 'num_run': 5, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=5, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.2775936126708984, status=<StatusType.SUCCESS: 1>, starttime=1663665054.5333724, endtime=1663665055.84306, additional_info={'duration': 1.198927640914917, 'num_run': 6, 'train_loss': 0.024561403508771895, 'configuration_origin': 'Initial design'})
RunKey(config_id=6, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.014184397163120588, time=1.840346336364746, status=<StatusType.SUCCESS: 1>, starttime=1663665055.8481805, endtime=1663665057.716139, additional_info={'duration': 1.7313201427459717, 'num_run': 7, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=7, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.512295961380005, status=<StatusType.SUCCESS: 1>, starttime=1663665057.7238793, endtime=1663665060.265423, additional_info={'duration': 2.364440441131592, 'num_run': 8, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=8, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.04255319148936165, time=2.1315276622772217, status=<StatusType.SUCCESS: 1>, starttime=1663665060.271228, endtime=1663665062.429951, additional_info={'duration': 2.0131988525390625, 'num_run': 9, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=9, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.4673690795898438, status=<StatusType.SUCCESS: 1>, starttime=1663665062.4357092, endtime=1663665064.9292157, additional_info={'duration': 2.3498237133026123, 'num_run': 10, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=10, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.5794708728790283, status=<StatusType.SUCCESS: 1>, starttime=1663665064.934585, endtime=1663665067.5437174, additional_info={'duration': 2.4640066623687744, 'num_run': 11, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=11, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=1.5726971626281738, status=<StatusType.SUCCESS: 1>, starttime=1663665067.5497928, endtime=1663665069.1497045, additional_info={'duration': 1.4734737873077393, 'num_run': 12, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=12, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.610743522644043, status=<StatusType.SUCCESS: 1>, starttime=1663665069.156014, endtime=1663665070.7938027, additional_info={'duration': 1.5205156803131104, 'num_run': 13, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=13, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.3221628665924072, status=<StatusType.SUCCESS: 1>, starttime=1663665070.8001842, endtime=1663665073.1480403, additional_info={'duration': 2.2304294109344482, 'num_run': 14, 'train_loss': 0.010526315789473717, 'configuration_origin': 'Initial design'})
RunKey(config_id=14, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.049645390070921946, time=5.315098762512207, status=<StatusType.SUCCESS: 1>, starttime=1663665073.1545255, endtime=1663665078.4978535, additional_info={'duration': 5.217244863510132, 'num_run': 15, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=15, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.021276595744680882, time=1.2758090496063232, status=<StatusType.SUCCESS: 1>, starttime=1663665078.505013, endtime=1663665079.8135793, additional_info={'duration': 1.1897251605987549, 'num_run': 16, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=16, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.031766176223755, status=<StatusType.SUCCESS: 1>, starttime=1663665079.820808, endtime=1663665081.878261, additional_info={'duration': 1.929640769958496, 'num_run': 17, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=17, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.4669880867004395, status=<StatusType.SUCCESS: 1>, starttime=1663665081.8850935, endtime=1663665084.380212, additional_info={'duration': 2.343514919281006, 'num_run': 18, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=18, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=3.1608927249908447, status=<StatusType.SUCCESS: 1>, starttime=1663665084.3869042, endtime=1663665087.5789628, additional_info={'duration': 3.045881748199463, 'num_run': 19, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=19, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.07801418439716312, time=0.8530073165893555, status=<StatusType.SUCCESS: 1>, starttime=1663665087.5859113, endtime=1663665088.4659152, additional_info={'duration': 0.7710719108581543, 'num_run': 20, 'train_loss': 0.10526315789473684, 'configuration_origin': 'Initial design'})
RunKey(config_id=20, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.021276595744680882, time=1.751319169998169, status=<StatusType.SUCCESS: 1>, starttime=1663665088.4732761, endtime=1663665090.2543724, additional_info={'duration': 1.6337840557098389, 'num_run': 21, 'train_loss': 0.007017543859649145, 'configuration_origin': 'Initial design'})
RunKey(config_id=21, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.2900927066802979, status=<StatusType.SUCCESS: 1>, starttime=1663665090.261764, endtime=1663665091.5791755, additional_info={'duration': 1.198190450668335, 'num_run': 22, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=22, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=1.0, time=2.0078036785125732, status=<StatusType.TIMEOUT: 2>, starttime=1663665091.5878708, endtime=1663665094.6180243, additional_info={'error': 'Timeout', 'configuration_origin': 'Initial design'})
RunKey(config_id=23, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=1.0, time=0.0, status=<StatusType.STOP: 8>, starttime=1663665094.6260393, endtime=1663665094.6260395, additional_info={})

AutoSklearnClassifier(delete_tmp_folder_after_terminate=False,
                      ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      per_run_time_limit=6, time_left_for_this_task=60,
                      tmp_folder='/tmp/autosklearn_sequential_example_tmp')

打印 auto-sklearn 构建的最终集成模型¶

pprint(automl.show_models(), indent=4)

{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca44a700>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cb155220>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca44ac40>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d62efdf0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1a8ddf0>,
           'ensemble_weight': 0.08,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb9ac4c0>,
           'model_id': 3,
           'rank': 2,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
              beta_2=0.9, early_stopping=True,
              hidden_layer_sizes=(115, 115, 115),
              learning_rate_init=0.00018009776276177523, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    4: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d237e820>,
           'cost': 0.05673758865248224,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ccc37160>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d237ed60>,
           'model_id': 4,
           'rank': 3,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.00021148999718383549, beta_1=0.999,
              beta_2=0.9, hidden_layer_sizes=(113, 113, 113),
              learning_rate_init=0.0007452270241186694, max_iter=64,
              n_iter_no_change=32, random_state=1, validation_fraction=0.0,
              verbose=0, warm_start=True)},
    5: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d248cca0>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cac0c130>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62b91c0>,
           'model_id': 5,
           'rank': 4,
           'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)},
    6: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3c54be0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61c9460>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3c54340>,
           'model_id': 6,
           'rank': 5,
           'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,
              early_stopping=True, hidden_layer_sizes=(101, 101),
              learning_rate_init=0.0004684917334431039, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ccb9f0a0>,
           'cost': 0.014184397163120588,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d611d7c0>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ccb9f220>,
           'model_id': 7,
           'rank': 6,
           'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
                     n_estimators=512, n_jobs=1, random_state=1,
                     warm_start=True)},
    8: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce5b4910>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d19ad430>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c36700>,
           'model_id': 8,
           'rank': 7,
           'sklearn_classifier': RandomForestClassifier(max_features=2, min_samples_leaf=2, n_estimators=512,
                       n_jobs=1, random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca268e20>,
           'cost': 0.04255319148936165,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c0ef10>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d41a25e0>,
           'model_id': 9,
           'rank': 8,
           'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=512,
                     n_jobs=1, random_state=1, warm_start=True)},
    10: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cc79aa60>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3b377f0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc79ab80>,
            'model_id': 10,
            'rank': 9,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=4, min_samples_split=6,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)},
    11: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35640>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca2753a0>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d351f0>,
            'model_id': 11,
            'rank': 10,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=23, min_samples_leaf=7,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)},
    12: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d25bf4c0>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cd0af160>,
            'ensemble_weight': 0.1,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d25bffa0>,
            'model_id': 12,
            'rank': 11,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=0.005326508887463406,
                               learning_rate=0.060800813211425456, max_iter=512,
                               max_leaf_nodes=6, min_samples_leaf=5,
                               n_iter_no_change=5, random_state=1,
                               validation_fraction=None, warm_start=True)},
    13: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d245a3a0>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca5617c0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3dfde50>,
            'model_id': 13,
            'rank': 12,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False,
                               l2_regularization=1.0647401999412075e-10,
                               learning_rate=0.08291320147381159, max_iter=512,
                               max_leaf_nodes=39, n_iter_no_change=0,
                               random_state=1, validation_fraction=None,
                               warm_start=True)},
    14: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c1d640>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d626b100>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cdc3a790>,
            'model_id': 14,
            'rank': 13,
            'sklearn_classifier': MLPClassifier(activation='tanh', alpha=2.5550223982458062e-06, beta_1=0.999,
              beta_2=0.9, hidden_layer_sizes=(54, 54, 54),
              learning_rate_init=0.00027271287919467994, max_iter=256,
              n_iter_no_change=32, random_state=1, validation_fraction=0.0,
              verbose=0, warm_start=True)},
    15: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb9ac5b0>,
            'cost': 0.049645390070921946,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d684e130>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d47b13d0>,
            'model_id': 15,
            'rank': 14,
            'sklearn_classifier': MLPClassifier(alpha=4.2841884333778574e-06, beta_1=0.999, beta_2=0.9,
              hidden_layer_sizes=(263, 263, 263),
              learning_rate_init=0.0011804284312897009, max_iter=128,
              n_iter_no_change=32, random_state=1, validation_fraction=0.0,
              verbose=0, warm_start=True)},
    16: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e68760>,
            'cost': 0.021276595744680882,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca44a310>,
            'ensemble_weight': 0.08,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e68d30>,
            'model_id': 16,
            'rank': 15,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.387912939529945e-10,
                               learning_rate=0.30755227194768237, max_iter=128,
                               max_leaf_nodes=60, min_samples_leaf=39,
                               n_iter_no_change=18, random_state=1,
                               validation_fraction=None, warm_start=True)},
    17: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b7c880>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cdc31c10>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6403670>,
            'model_id': 17,
            'rank': 16,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=0.4635442279519353,
                               learning_rate=0.09809681787962342, max_iter=512,
                               max_leaf_nodes=328, min_samples_leaf=2,
                               n_iter_no_change=2, random_state=1,
                               validation_fraction=None, warm_start=True)},
    18: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cc61fdc0>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62ef2b0>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1035610>,
            'model_id': 18,
            'rank': 17,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, n_estimators=512,
                       n_jobs=1, random_state=1, warm_start=True)},
    19: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d179c040>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d4831760>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc636d60>,
            'model_id': 19,
            'rank': 18,
            'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=448, min_samples_leaf=2,
                     min_samples_split=20, n_estimators=512, n_jobs=1,
                     random_state=1, warm_start=True)},
    20: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d474c9d0>,
            'cost': 0.07801418439716312,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f2eb80>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d474c250>,
            'model_id': 20,
            'rank': 19,
            'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=32, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
    21: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cd03d2e0>,
            'cost': 0.021276595744680882,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d103b250>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cd040e50>,
            'model_id': 21,
            'rank': 20,
            'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=4, min_samples_leaf=2,
                     min_samples_split=15, n_estimators=512, n_jobs=1,
                     random_state=1, warm_start=True)},
    22: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d400fa30>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1fa2190>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1fbf820>,
            'model_id': 22,
            'rank': 21,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=8.057778875694463e-05,
                               learning_rate=0.09179220974965213, max_iter=256,
                               max_leaf_nodes=200, n_iter_no_change=18,
                               random_state=1,
                               validation_fraction=0.14295295806077554,
                               warm_start=True)}}

获取最终集成模型的得分¶

predictions = automl.predict(X_test)
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.985816
  Number of target algorithm runs: 22
  Number of successful target algorithm runs: 21
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 1
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.9440559440559441

脚本总运行时间： ( 0 分 58.297 秒)

由 Sphinx-Gallery 生成的画廊