注意
点击 这里 下载完整的示例代码或通过 Binder 在浏览器中运行此示例
顺序用法¶
默认情况下,auto-sklearn 并行拟合机器学习模型并构建它们的集成。但是,也可以按顺序运行这两个过程。下面的示例展示了如何先拟合模型,然后再构建集成。
from pprint import pprint
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
数据加载¶
from autosklearn.ensembles.ensemble_selection import EnsembleSelection
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1
)
构建并拟合分类器¶
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
tmp_folder="/tmp/autosklearn_sequential_example_tmp",
# Do not construct ensembles in parallel to avoid using more than one
# core at a time. The ensemble will be constructed after auto-sklearn
# finished fitting all machine learning models.
ensemble_class=None,
delete_tmp_folder_after_terminate=False,
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")
# This call to fit_ensemble uses all models trained in the previous call
# to fit to build an ensemble which can be used with automl.predict()
automl.fit_ensemble(y_train, ensemble_class=EnsembleSelection)
RunKey(config_id=1, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.138148307800293, status=<StatusType.SUCCESS: 1>, starttime=1663665046.729183, endtime=1663665048.8936107, additional_info={'duration': 2.025052070617676, 'num_run': 2, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=2, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.1886756420135498, status=<StatusType.SUCCESS: 1>, starttime=1663665048.8985617, endtime=1663665050.113383, additional_info={'duration': 1.0999813079833984, 'num_run': 3, 'train_loss': 0.01754385964912286, 'configuration_origin': 'Initial design'})
RunKey(config_id=3, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.05673758865248224, time=1.8330206871032715, status=<StatusType.SUCCESS: 1>, starttime=1663665050.1179628, endtime=1663665051.9798045, additional_info={'duration': 1.750197410583496, 'num_run': 4, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=4, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.5171611309051514, status=<StatusType.SUCCESS: 1>, starttime=1663665051.984322, endtime=1663665054.5282989, additional_info={'duration': 2.3936502933502197, 'num_run': 5, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=5, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.2775936126708984, status=<StatusType.SUCCESS: 1>, starttime=1663665054.5333724, endtime=1663665055.84306, additional_info={'duration': 1.198927640914917, 'num_run': 6, 'train_loss': 0.024561403508771895, 'configuration_origin': 'Initial design'})
RunKey(config_id=6, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.014184397163120588, time=1.840346336364746, status=<StatusType.SUCCESS: 1>, starttime=1663665055.8481805, endtime=1663665057.716139, additional_info={'duration': 1.7313201427459717, 'num_run': 7, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=7, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.512295961380005, status=<StatusType.SUCCESS: 1>, starttime=1663665057.7238793, endtime=1663665060.265423, additional_info={'duration': 2.364440441131592, 'num_run': 8, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=8, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.04255319148936165, time=2.1315276622772217, status=<StatusType.SUCCESS: 1>, starttime=1663665060.271228, endtime=1663665062.429951, additional_info={'duration': 2.0131988525390625, 'num_run': 9, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=9, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.4673690795898438, status=<StatusType.SUCCESS: 1>, starttime=1663665062.4357092, endtime=1663665064.9292157, additional_info={'duration': 2.3498237133026123, 'num_run': 10, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=10, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.5794708728790283, status=<StatusType.SUCCESS: 1>, starttime=1663665064.934585, endtime=1663665067.5437174, additional_info={'duration': 2.4640066623687744, 'num_run': 11, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=11, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=1.5726971626281738, status=<StatusType.SUCCESS: 1>, starttime=1663665067.5497928, endtime=1663665069.1497045, additional_info={'duration': 1.4734737873077393, 'num_run': 12, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=12, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.610743522644043, status=<StatusType.SUCCESS: 1>, starttime=1663665069.156014, endtime=1663665070.7938027, additional_info={'duration': 1.5205156803131104, 'num_run': 13, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=13, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=2.3221628665924072, status=<StatusType.SUCCESS: 1>, starttime=1663665070.8001842, endtime=1663665073.1480403, additional_info={'duration': 2.2304294109344482, 'num_run': 14, 'train_loss': 0.010526315789473717, 'configuration_origin': 'Initial design'})
RunKey(config_id=14, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.049645390070921946, time=5.315098762512207, status=<StatusType.SUCCESS: 1>, starttime=1663665073.1545255, endtime=1663665078.4978535, additional_info={'duration': 5.217244863510132, 'num_run': 15, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=15, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.021276595744680882, time=1.2758090496063232, status=<StatusType.SUCCESS: 1>, starttime=1663665078.505013, endtime=1663665079.8135793, additional_info={'duration': 1.1897251605987549, 'num_run': 16, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=16, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.031766176223755, status=<StatusType.SUCCESS: 1>, starttime=1663665079.820808, endtime=1663665081.878261, additional_info={'duration': 1.929640769958496, 'num_run': 17, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=17, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.03546099290780147, time=2.4669880867004395, status=<StatusType.SUCCESS: 1>, starttime=1663665081.8850935, endtime=1663665084.380212, additional_info={'duration': 2.343514919281006, 'num_run': 18, 'train_loss': 0.0, 'configuration_origin': 'Initial design'})
RunKey(config_id=18, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=3.1608927249908447, status=<StatusType.SUCCESS: 1>, starttime=1663665084.3869042, endtime=1663665087.5789628, additional_info={'duration': 3.045881748199463, 'num_run': 19, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=19, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.07801418439716312, time=0.8530073165893555, status=<StatusType.SUCCESS: 1>, starttime=1663665087.5859113, endtime=1663665088.4659152, additional_info={'duration': 0.7710719108581543, 'num_run': 20, 'train_loss': 0.10526315789473684, 'configuration_origin': 'Initial design'})
RunKey(config_id=20, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.021276595744680882, time=1.751319169998169, status=<StatusType.SUCCESS: 1>, starttime=1663665088.4732761, endtime=1663665090.2543724, additional_info={'duration': 1.6337840557098389, 'num_run': 21, 'train_loss': 0.007017543859649145, 'configuration_origin': 'Initial design'})
RunKey(config_id=21, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=0.028368794326241176, time=1.2900927066802979, status=<StatusType.SUCCESS: 1>, starttime=1663665090.261764, endtime=1663665091.5791755, additional_info={'duration': 1.198190450668335, 'num_run': 22, 'train_loss': 0.0035087719298245723, 'configuration_origin': 'Initial design'})
RunKey(config_id=22, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=1.0, time=2.0078036785125732, status=<StatusType.TIMEOUT: 2>, starttime=1663665091.5878708, endtime=1663665094.6180243, additional_info={'error': 'Timeout', 'configuration_origin': 'Initial design'})
RunKey(config_id=23, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=0.0) RunValue(cost=1.0, time=0.0, status=<StatusType.STOP: 8>, starttime=1663665094.6260393, endtime=1663665094.6260395, additional_info={})
AutoSklearnClassifier(delete_tmp_folder_after_terminate=False,
ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
per_run_time_limit=6, time_left_for_this_task=60,
tmp_folder='/tmp/autosklearn_sequential_example_tmp')
打印 auto-sklearn 构建的最终集成模型¶
pprint(automl.show_models(), indent=4)
{ 2: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca44a700>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cb155220>,
'ensemble_weight': 0.1,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca44ac40>,
'model_id': 2,
'rank': 1,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
3: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d62efdf0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1a8ddf0>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb9ac4c0>,
'model_id': 3,
'rank': 2,
'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
beta_2=0.9, early_stopping=True,
hidden_layer_sizes=(115, 115, 115),
learning_rate_init=0.00018009776276177523, max_iter=32,
n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
4: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d237e820>,
'cost': 0.05673758865248224,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ccc37160>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d237ed60>,
'model_id': 4,
'rank': 3,
'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.00021148999718383549, beta_1=0.999,
beta_2=0.9, hidden_layer_sizes=(113, 113, 113),
learning_rate_init=0.0007452270241186694, max_iter=64,
n_iter_no_change=32, random_state=1, validation_fraction=0.0,
verbose=0, warm_start=True)},
5: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d248cca0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cac0c130>,
'ensemble_weight': 0.1,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62b91c0>,
'model_id': 5,
'rank': 4,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
6: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3c54be0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61c9460>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3c54340>,
'model_id': 6,
'rank': 5,
'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,
early_stopping=True, hidden_layer_sizes=(101, 101),
learning_rate_init=0.0004684917334431039, max_iter=32,
n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
7: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ccb9f0a0>,
'cost': 0.014184397163120588,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d611d7c0>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ccb9f220>,
'model_id': 7,
'rank': 6,
'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
8: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce5b4910>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d19ad430>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c36700>,
'model_id': 8,
'rank': 7,
'sklearn_classifier': RandomForestClassifier(max_features=2, min_samples_leaf=2, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
9: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca268e20>,
'cost': 0.04255319148936165,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c0ef10>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d41a25e0>,
'model_id': 9,
'rank': 8,
'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
10: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cc79aa60>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3b377f0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc79ab80>,
'model_id': 10,
'rank': 9,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=4, min_samples_split=6,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
11: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35640>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca2753a0>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d351f0>,
'model_id': 11,
'rank': 10,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=23, min_samples_leaf=7,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
12: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d25bf4c0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cd0af160>,
'ensemble_weight': 0.1,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d25bffa0>,
'model_id': 12,
'rank': 11,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=0.005326508887463406,
learning_rate=0.060800813211425456, max_iter=512,
max_leaf_nodes=6, min_samples_leaf=5,
n_iter_no_change=5, random_state=1,
validation_fraction=None, warm_start=True)},
13: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d245a3a0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca5617c0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3dfde50>,
'model_id': 13,
'rank': 12,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False,
l2_regularization=1.0647401999412075e-10,
learning_rate=0.08291320147381159, max_iter=512,
max_leaf_nodes=39, n_iter_no_change=0,
random_state=1, validation_fraction=None,
warm_start=True)},
14: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c1d640>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d626b100>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cdc3a790>,
'model_id': 14,
'rank': 13,
'sklearn_classifier': MLPClassifier(activation='tanh', alpha=2.5550223982458062e-06, beta_1=0.999,
beta_2=0.9, hidden_layer_sizes=(54, 54, 54),
learning_rate_init=0.00027271287919467994, max_iter=256,
n_iter_no_change=32, random_state=1, validation_fraction=0.0,
verbose=0, warm_start=True)},
15: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb9ac5b0>,
'cost': 0.049645390070921946,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d684e130>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d47b13d0>,
'model_id': 15,
'rank': 14,
'sklearn_classifier': MLPClassifier(alpha=4.2841884333778574e-06, beta_1=0.999, beta_2=0.9,
hidden_layer_sizes=(263, 263, 263),
learning_rate_init=0.0011804284312897009, max_iter=128,
n_iter_no_change=32, random_state=1, validation_fraction=0.0,
verbose=0, warm_start=True)},
16: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e68760>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca44a310>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e68d30>,
'model_id': 16,
'rank': 15,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.387912939529945e-10,
learning_rate=0.30755227194768237, max_iter=128,
max_leaf_nodes=60, min_samples_leaf=39,
n_iter_no_change=18, random_state=1,
validation_fraction=None, warm_start=True)},
17: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b7c880>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cdc31c10>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6403670>,
'model_id': 17,
'rank': 16,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=0.4635442279519353,
learning_rate=0.09809681787962342, max_iter=512,
max_leaf_nodes=328, min_samples_leaf=2,
n_iter_no_change=2, random_state=1,
validation_fraction=None, warm_start=True)},
18: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cc61fdc0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62ef2b0>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1035610>,
'model_id': 18,
'rank': 17,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
19: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d179c040>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d4831760>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc636d60>,
'model_id': 19,
'rank': 18,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=448, min_samples_leaf=2,
min_samples_split=20, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
20: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d474c9d0>,
'cost': 0.07801418439716312,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f2eb80>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d474c250>,
'model_id': 20,
'rank': 19,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=32, random_state=1,
tol=0.0002600768160857831, warm_start=True)},
21: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cd03d2e0>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d103b250>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cd040e50>,
'model_id': 21,
'rank': 20,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=4, min_samples_leaf=2,
min_samples_split=15, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
22: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d400fa30>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1fa2190>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1fbf820>,
'model_id': 22,
'rank': 21,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=8.057778875694463e-05,
learning_rate=0.09179220974965213, max_iter=256,
max_leaf_nodes=200, n_iter_no_change=18,
random_state=1,
validation_fraction=0.14295295806077554,
warm_start=True)}}
获取最终集成模型的得分¶
predictions = automl.predict(X_test)
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
auto-sklearn results:
Dataset name: breast_cancer
Metric: accuracy
Best validation score: 0.985816
Number of target algorithm runs: 22
Number of successful target algorithm runs: 21
Number of crashed target algorithm runs: 0
Number of target algorithms that exceeded the time limit: 1
Number of target algorithms that exceeded the memory limit: 0
Accuracy score 0.9440559440559441
脚本总运行时间: ( 0 分 58.297 秒)